In [2]:
from __future__ import print_function
import os.path
import dalmatian as dm
import pandas as pd
import sys
sys.path.insert(0, '../../')
#import Datanalytics as da 
from JKBio import TerraFunction as terra
%load_ext autoreload
%autoreload 2
from JKBio import Helper as h

import pickle
from taigapy import TaigaClient
tc = TaigaClient()
import numpy as np
import itertools

from bokeh.plotting import *
from bokeh.models import HoverTool
output_notebook()
import matplotlib.pyplot as plt
%load_ext rpy2.ipython
import seaborn as sns
import gseapy
from JKBio.helper import pyDESeq2
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import AgglomerativeClustering

from sklearn.manifold import MDS, TSNE
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
Loading BokehJS ...

getting data

In [7]:
! gsutil mv gs://transfer-amlproject/*MP7624* gs://transfer-amlproject/RNPv2/
Copying gs://transfer-amlproject/20200304_10_MP7624_S10_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_10_MP7624_S10_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_10_MP7624_S10_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_10_MP7624_S10_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_11_MP7624_S11_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_11_MP7624_S11_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_11_MP7624_S11_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_11_MP7624_S11_R2_001.fastq.gz...     

==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m cp ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

Copying gs://transfer-amlproject/20200304_12_MP7624_S12_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_12_MP7624_S12_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_12_MP7624_S12_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_12_MP7624_S12_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_13_MP7624_S13_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_13_MP7624_S13_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_13_MP7624_S13_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_13_MP7624_S13_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_14_MP7624_S14_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_14_MP7624_S14_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_14_MP7624_S14_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_14_MP7624_S14_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_15_MP7624_S15_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_15_MP7624_S15_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_15_MP7624_S15_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_15_MP7624_S15_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_16_MP7624_S16_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_16_MP7624_S16_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_16_MP7624_S16_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_16_MP7624_S16_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_17_MP7624_S17_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_17_MP7624_S17_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_17_MP7624_S17_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_17_MP7624_S17_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_18_MP7624_S18_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_18_MP7624_S18_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_18_MP7624_S18_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_18_MP7624_S18_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_19_MP7624_S19_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_19_MP7624_S19_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_19_MP7624_S19_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_19_MP7624_S19_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_1_MP7624_S1_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_1_MP7624_S1_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_1_MP7624_S1_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_1_MP7624_S1_R2_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_20_MP7624_S20_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_20_MP7624_S20_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_20_MP7624_S20_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_20_MP7624_S20_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_21_MP7624_S21_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_21_MP7624_S21_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_21_MP7624_S21_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_21_MP7624_S21_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_22_MP7624_S22_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_22_MP7624_S22_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_22_MP7624_S22_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_22_MP7624_S22_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_23_MP7624_S23_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_23_MP7624_S23_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_23_MP7624_S23_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_23_MP7624_S23_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_24_MP7624_S24_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_24_MP7624_S24_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_24_MP7624_S24_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_24_MP7624_S24_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_25_MP7624_S25_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_25_MP7624_S25_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_25_MP7624_S25_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_25_MP7624_S25_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_26_MP7624_S26_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_26_MP7624_S26_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_26_MP7624_S26_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_26_MP7624_S26_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_27_MP7624_S27_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_27_MP7624_S27_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_27_MP7624_S27_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_27_MP7624_S27_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_28_MP7624_S28_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_28_MP7624_S28_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_28_MP7624_S28_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_28_MP7624_S28_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_29_MP7624_S29_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_29_MP7624_S29_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_29_MP7624_S29_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_29_MP7624_S29_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_2_MP7624_S2_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_2_MP7624_S2_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_2_MP7624_S2_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_2_MP7624_S2_R2_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_30_MP7624_S30_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_30_MP7624_S30_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_30_MP7624_S30_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_30_MP7624_S30_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_31_MP7624_S31_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_31_MP7624_S31_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_31_MP7624_S31_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_31_MP7624_S31_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_32_MP7624_S32_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_32_MP7624_S32_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_32_MP7624_S32_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_32_MP7624_S32_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_33_MP7624_S33_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_33_MP7624_S33_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_33_MP7624_S33_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_33_MP7624_S33_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_34_MP7624_S34_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_34_MP7624_S34_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_34_MP7624_S34_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_34_MP7624_S34_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_35_MP7624_S35_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_35_MP7624_S35_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_35_MP7624_S35_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_35_MP7624_S35_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_36_MP7624_S36_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_36_MP7624_S36_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_36_MP7624_S36_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_36_MP7624_S36_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_37_MP7624_S37_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_37_MP7624_S37_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_37_MP7624_S37_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_37_MP7624_S37_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_38_MP7624_S38_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_38_MP7624_S38_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_38_MP7624_S38_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_38_MP7624_S38_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_39_MP7624_S39_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_39_MP7624_S39_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_39_MP7624_S39_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_39_MP7624_S39_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_3_MP7624_S3_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_3_MP7624_S3_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_3_MP7624_S3_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_3_MP7624_S3_R2_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_40_MP7624_S40_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_40_MP7624_S40_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_40_MP7624_S40_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_40_MP7624_S40_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_41_MP7624_S41_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_41_MP7624_S41_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_41_MP7624_S41_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_41_MP7624_S41_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_42_MP7624_S42_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_42_MP7624_S42_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_42_MP7624_S42_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_42_MP7624_S42_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_43_MP7624_S43_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_43_MP7624_S43_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_43_MP7624_S43_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_43_MP7624_S43_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_44_MP7624_S44_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_44_MP7624_S44_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_44_MP7624_S44_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_44_MP7624_S44_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_45_MP7624_S45_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_45_MP7624_S45_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_45_MP7624_S45_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_45_MP7624_S45_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_46_MP7624_S46_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_46_MP7624_S46_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_46_MP7624_S46_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_46_MP7624_S46_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_47_MP7624_S47_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_47_MP7624_S47_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_47_MP7624_S47_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_47_MP7624_S47_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_48_MP7624_S48_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_48_MP7624_S48_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_48_MP7624_S48_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_48_MP7624_S48_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_49_MP7624_S49_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_49_MP7624_S49_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_49_MP7624_S49_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_49_MP7624_S49_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_4_MP7624_S4_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_4_MP7624_S4_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_4_MP7624_S4_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_4_MP7624_S4_R2_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_50_MP7624_S50_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_50_MP7624_S50_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_50_MP7624_S50_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_50_MP7624_S50_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_51_MP7624_S51_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_51_MP7624_S51_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_51_MP7624_S51_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_51_MP7624_S51_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_52_MP7624_S52_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_52_MP7624_S52_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_52_MP7624_S52_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_52_MP7624_S52_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_53_MP7624_S53_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_53_MP7624_S53_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_53_MP7624_S53_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_53_MP7624_S53_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_54_MP7624_S54_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_54_MP7624_S54_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_54_MP7624_S54_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_54_MP7624_S54_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_55_MP7624_S55_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_55_MP7624_S55_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_55_MP7624_S55_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_55_MP7624_S55_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_56_MP7624_S56_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_56_MP7624_S56_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_56_MP7624_S56_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_56_MP7624_S56_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_57_MP7624_S57_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_57_MP7624_S57_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_57_MP7624_S57_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_57_MP7624_S57_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_58_MP7624_S58_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_58_MP7624_S58_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_58_MP7624_S58_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_58_MP7624_S58_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_59_MP7624_S59_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_59_MP7624_S59_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_59_MP7624_S59_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_59_MP7624_S59_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_5_MP7624_S5_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_5_MP7624_S5_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_5_MP7624_S5_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_5_MP7624_S5_R2_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_60_MP7624_S60_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_60_MP7624_S60_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_60_MP7624_S60_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_60_MP7624_S60_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_61_MP7624_S61_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_61_MP7624_S61_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_61_MP7624_S61_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_61_MP7624_S61_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_62_MP7624_S62_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_62_MP7624_S62_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_62_MP7624_S62_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_62_MP7624_S62_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_63_MP7624_S63_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_63_MP7624_S63_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_63_MP7624_S63_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_63_MP7624_S63_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_64_MP7624_S64_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_64_MP7624_S64_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_64_MP7624_S64_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_64_MP7624_S64_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_65_MP7624_S65_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_65_MP7624_S65_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_65_MP7624_S65_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_65_MP7624_S65_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_66_MP7624_S66_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_66_MP7624_S66_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_66_MP7624_S66_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_66_MP7624_S66_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_67_MP7624_S67_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_67_MP7624_S67_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_67_MP7624_S67_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_67_MP7624_S67_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_68_MP7624_S68_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_68_MP7624_S68_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_68_MP7624_S68_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_68_MP7624_S68_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_69_MP7624_S69_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_69_MP7624_S69_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_69_MP7624_S69_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_69_MP7624_S69_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_6_MP7624_S6_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_6_MP7624_S6_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_6_MP7624_S6_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_6_MP7624_S6_R2_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_7_MP7624_S7_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_7_MP7624_S7_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_7_MP7624_S7_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_7_MP7624_S7_R2_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_8_MP7624_S8_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_8_MP7624_S8_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_8_MP7624_S8_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_8_MP7624_S8_R2_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_9_MP7624_S9_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_9_MP7624_S9_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_9_MP7624_S9_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_9_MP7624_S9_R2_001.fastq.gz...       

==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m cp ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.


Operation completed over 138 objects/240.6 GiB.                                  
In [8]:
! gsutil -m cp -r gs://transfer-amlproject/RNPv3 gs://amlproject/RNA/
Copying gs://transfer-amlproject/RNPv2/20200304_10_MP7624_S10_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_10_MP7624_S10_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_11_MP7624_S11_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_11_MP7624_S11_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_12_MP7624_S12_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_12_MP7624_S12_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_13_MP7624_S13_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_14_MP7624_S14_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_13_MP7624_S13_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_14_MP7624_S14_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_15_MP7624_S15_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_15_MP7624_S15_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_16_MP7624_S16_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_16_MP7624_S16_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_17_MP7624_S17_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_17_MP7624_S17_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_18_MP7624_S18_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_18_MP7624_S18_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_19_MP7624_S19_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_19_MP7624_S19_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_1_MP7624_S1_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_1_MP7624_S1_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_20_MP7624_S20_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_20_MP7624_S20_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_21_MP7624_S21_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_21_MP7624_S21_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_22_MP7624_S22_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_22_MP7624_S22_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_23_MP7624_S23_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_23_MP7624_S23_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_24_MP7624_S24_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_24_MP7624_S24_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_25_MP7624_S25_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_25_MP7624_S25_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_26_MP7624_S26_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_26_MP7624_S26_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_27_MP7624_S27_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_27_MP7624_S27_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_28_MP7624_S28_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_29_MP7624_S29_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_28_MP7624_S28_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_2_MP7624_S2_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_29_MP7624_S29_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_2_MP7624_S2_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_30_MP7624_S30_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_30_MP7624_S30_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_31_MP7624_S31_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_32_MP7624_S32_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_31_MP7624_S31_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_33_MP7624_S33_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_32_MP7624_S32_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_33_MP7624_S33_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_34_MP7624_S34_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_34_MP7624_S34_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_35_MP7624_S35_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_48_MP7624_S48_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_35_MP7624_S35_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_36_MP7624_S36_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_36_MP7624_S36_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_37_MP7624_S37_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_38_MP7624_S38_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_37_MP7624_S37_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_3_MP7624_S3_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_38_MP7624_S38_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_39_MP7624_S39_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_39_MP7624_S39_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_40_MP7624_S40_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_3_MP7624_S3_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_40_MP7624_S40_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_42_MP7624_S42_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_41_MP7624_S41_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_47_MP7624_S47_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_44_MP7624_S44_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_41_MP7624_S41_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_42_MP7624_S42_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_43_MP7624_S43_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_45_MP7624_S45_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_43_MP7624_S43_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_44_MP7624_S44_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_45_MP7624_S45_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_46_MP7624_S46_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_46_MP7624_S46_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_49_MP7624_S49_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_47_MP7624_S47_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_49_MP7624_S49_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_4_MP7624_S4_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_51_MP7624_S51_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_4_MP7624_S4_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_48_MP7624_S48_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_51_MP7624_S51_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_50_MP7624_S50_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_50_MP7624_S50_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_52_MP7624_S52_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_53_MP7624_S53_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_52_MP7624_S52_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_53_MP7624_S53_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_54_MP7624_S54_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_54_MP7624_S54_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_55_MP7624_S55_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_55_MP7624_S55_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_56_MP7624_S56_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_56_MP7624_S56_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_57_MP7624_S57_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_57_MP7624_S57_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_58_MP7624_S58_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_58_MP7624_S58_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_59_MP7624_S59_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_59_MP7624_S59_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_5_MP7624_S5_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_5_MP7624_S5_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_60_MP7624_S60_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_60_MP7624_S60_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_61_MP7624_S61_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_61_MP7624_S61_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_62_MP7624_S62_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_62_MP7624_S62_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_63_MP7624_S63_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_63_MP7624_S63_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_64_MP7624_S64_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_64_MP7624_S64_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_65_MP7624_S65_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_65_MP7624_S65_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_66_MP7624_S66_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_66_MP7624_S66_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_67_MP7624_S67_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_68_MP7624_S68_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_68_MP7624_S68_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_67_MP7624_S67_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_69_MP7624_S69_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_6_MP7624_S6_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_69_MP7624_S69_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_6_MP7624_S6_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_7_MP7624_S7_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_7_MP7624_S7_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_8_MP7624_S8_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_8_MP7624_S8_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_9_MP7624_S9_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_9_MP7624_S9_R2_001.fastq.gz [Content-Type=application/octet-stream]...
\ [138/138 files][240.6 GiB/240.6 GiB] 100% Done                                
Operation completed over 138 objects/240.6 GiB.                                  
In [10]:
! gsutil ls gs://amlproject/
gs://amlproject/MV-4-11.bai
gs://amlproject/MV-4-11.bam
gs://amlproject/Chip/
gs://amlproject/RNA/
gs://amlproject/RNPv2/
In [3]:
sampleset='RNPv3'
In [16]:
terra.uploadFromFolder('amlproject','RNPv2/',
                       'broad-firecloud-ccle/hg38_RNAseq',samplesetname=sampleset,
                      fformat="fastqR1R2", sep='_MP7624')
please be sure you gave access to your terra email account access to this bucket
['RNPv2/20200304_10_MP7624_S10_R1_001.fastq.gz', 'RNPv2/20200304_10_MP7624_S10_R2_001.fastq.gz', 'RNPv2/20200304_11_MP7624_S11_R1_001.fastq.gz', 'RNPv2/20200304_11_MP7624_S11_R2_001.fastq.gz', 'RNPv2/20200304_12_MP7624_S12_R1_001.fastq.gz', 'RNPv2/20200304_12_MP7624_S12_R2_001.fastq.gz', 'RNPv2/20200304_13_MP7624_S13_R1_001.fastq.gz', 'RNPv2/20200304_13_MP7624_S13_R2_001.fastq.gz', 'RNPv2/20200304_14_MP7624_S14_R1_001.fastq.gz', 'RNPv2/20200304_14_MP7624_S14_R2_001.fastq.gz', 'RNPv2/20200304_15_MP7624_S15_R1_001.fastq.gz', 'RNPv2/20200304_15_MP7624_S15_R2_001.fastq.gz', 'RNPv2/20200304_16_MP7624_S16_R1_001.fastq.gz', 'RNPv2/20200304_16_MP7624_S16_R2_001.fastq.gz', 'RNPv2/20200304_17_MP7624_S17_R1_001.fastq.gz', 'RNPv2/20200304_17_MP7624_S17_R2_001.fastq.gz', 'RNPv2/20200304_18_MP7624_S18_R1_001.fastq.gz', 'RNPv2/20200304_18_MP7624_S18_R2_001.fastq.gz', 'RNPv2/20200304_19_MP7624_S19_R1_001.fastq.gz', 'RNPv2/20200304_19_MP7624_S19_R2_001.fastq.gz', 'RNPv2/20200304_1_MP7624_S1_R1_001.fastq.gz', 'RNPv2/20200304_1_MP7624_S1_R2_001.fastq.gz', 'RNPv2/20200304_20_MP7624_S20_R1_001.fastq.gz', 'RNPv2/20200304_20_MP7624_S20_R2_001.fastq.gz', 'RNPv2/20200304_21_MP7624_S21_R1_001.fastq.gz', 'RNPv2/20200304_21_MP7624_S21_R2_001.fastq.gz', 'RNPv2/20200304_22_MP7624_S22_R1_001.fastq.gz', 'RNPv2/20200304_22_MP7624_S22_R2_001.fastq.gz', 'RNPv2/20200304_23_MP7624_S23_R1_001.fastq.gz', 'RNPv2/20200304_23_MP7624_S23_R2_001.fastq.gz', 'RNPv2/20200304_24_MP7624_S24_R1_001.fastq.gz', 'RNPv2/20200304_24_MP7624_S24_R2_001.fastq.gz', 'RNPv2/20200304_25_MP7624_S25_R1_001.fastq.gz', 'RNPv2/20200304_25_MP7624_S25_R2_001.fastq.gz', 'RNPv2/20200304_26_MP7624_S26_R1_001.fastq.gz', 'RNPv2/20200304_26_MP7624_S26_R2_001.fastq.gz', 'RNPv2/20200304_27_MP7624_S27_R1_001.fastq.gz', 'RNPv2/20200304_27_MP7624_S27_R2_001.fastq.gz', 'RNPv2/20200304_28_MP7624_S28_R1_001.fastq.gz', 'RNPv2/20200304_28_MP7624_S28_R2_001.fastq.gz', 'RNPv2/20200304_29_MP7624_S29_R1_001.fastq.gz', 'RNPv2/20200304_29_MP7624_S29_R2_001.fastq.gz', 'RNPv2/20200304_2_MP7624_S2_R1_001.fastq.gz', 'RNPv2/20200304_2_MP7624_S2_R2_001.fastq.gz', 'RNPv2/20200304_30_MP7624_S30_R1_001.fastq.gz', 'RNPv2/20200304_30_MP7624_S30_R2_001.fastq.gz', 'RNPv2/20200304_31_MP7624_S31_R1_001.fastq.gz', 'RNPv2/20200304_31_MP7624_S31_R2_001.fastq.gz', 'RNPv2/20200304_32_MP7624_S32_R1_001.fastq.gz', 'RNPv2/20200304_32_MP7624_S32_R2_001.fastq.gz', 'RNPv2/20200304_33_MP7624_S33_R1_001.fastq.gz', 'RNPv2/20200304_33_MP7624_S33_R2_001.fastq.gz', 'RNPv2/20200304_34_MP7624_S34_R1_001.fastq.gz', 'RNPv2/20200304_34_MP7624_S34_R2_001.fastq.gz', 'RNPv2/20200304_35_MP7624_S35_R1_001.fastq.gz', 'RNPv2/20200304_35_MP7624_S35_R2_001.fastq.gz', 'RNPv2/20200304_36_MP7624_S36_R1_001.fastq.gz', 'RNPv2/20200304_36_MP7624_S36_R2_001.fastq.gz', 'RNPv2/20200304_37_MP7624_S37_R1_001.fastq.gz', 'RNPv2/20200304_37_MP7624_S37_R2_001.fastq.gz', 'RNPv2/20200304_38_MP7624_S38_R1_001.fastq.gz', 'RNPv2/20200304_38_MP7624_S38_R2_001.fastq.gz', 'RNPv2/20200304_39_MP7624_S39_R1_001.fastq.gz', 'RNPv2/20200304_39_MP7624_S39_R2_001.fastq.gz', 'RNPv2/20200304_3_MP7624_S3_R1_001.fastq.gz', 'RNPv2/20200304_3_MP7624_S3_R2_001.fastq.gz', 'RNPv2/20200304_40_MP7624_S40_R1_001.fastq.gz', 'RNPv2/20200304_40_MP7624_S40_R2_001.fastq.gz', 'RNPv2/20200304_41_MP7624_S41_R1_001.fastq.gz', 'RNPv2/20200304_41_MP7624_S41_R2_001.fastq.gz', 'RNPv2/20200304_42_MP7624_S42_R1_001.fastq.gz', 'RNPv2/20200304_42_MP7624_S42_R2_001.fastq.gz', 'RNPv2/20200304_43_MP7624_S43_R1_001.fastq.gz', 'RNPv2/20200304_43_MP7624_S43_R2_001.fastq.gz', 'RNPv2/20200304_44_MP7624_S44_R1_001.fastq.gz', 'RNPv2/20200304_44_MP7624_S44_R2_001.fastq.gz', 'RNPv2/20200304_45_MP7624_S45_R1_001.fastq.gz', 'RNPv2/20200304_45_MP7624_S45_R2_001.fastq.gz', 'RNPv2/20200304_46_MP7624_S46_R1_001.fastq.gz', 'RNPv2/20200304_46_MP7624_S46_R2_001.fastq.gz', 'RNPv2/20200304_47_MP7624_S47_R1_001.fastq.gz', 'RNPv2/20200304_47_MP7624_S47_R2_001.fastq.gz', 'RNPv2/20200304_48_MP7624_S48_R1_001.fastq.gz', 'RNPv2/20200304_48_MP7624_S48_R2_001.fastq.gz', 'RNPv2/20200304_49_MP7624_S49_R1_001.fastq.gz', 'RNPv2/20200304_49_MP7624_S49_R2_001.fastq.gz', 'RNPv2/20200304_4_MP7624_S4_R1_001.fastq.gz', 'RNPv2/20200304_4_MP7624_S4_R2_001.fastq.gz', 'RNPv2/20200304_50_MP7624_S50_R1_001.fastq.gz', 'RNPv2/20200304_50_MP7624_S50_R2_001.fastq.gz', 'RNPv2/20200304_51_MP7624_S51_R1_001.fastq.gz', 'RNPv2/20200304_51_MP7624_S51_R2_001.fastq.gz', 'RNPv2/20200304_52_MP7624_S52_R1_001.fastq.gz', 'RNPv2/20200304_52_MP7624_S52_R2_001.fastq.gz', 'RNPv2/20200304_53_MP7624_S53_R1_001.fastq.gz', 'RNPv2/20200304_53_MP7624_S53_R2_001.fastq.gz', 'RNPv2/20200304_54_MP7624_S54_R1_001.fastq.gz', 'RNPv2/20200304_54_MP7624_S54_R2_001.fastq.gz', 'RNPv2/20200304_55_MP7624_S55_R1_001.fastq.gz', 'RNPv2/20200304_55_MP7624_S55_R2_001.fastq.gz', 'RNPv2/20200304_56_MP7624_S56_R1_001.fastq.gz', 'RNPv2/20200304_56_MP7624_S56_R2_001.fastq.gz', 'RNPv2/20200304_57_MP7624_S57_R1_001.fastq.gz', 'RNPv2/20200304_57_MP7624_S57_R2_001.fastq.gz', 'RNPv2/20200304_58_MP7624_S58_R1_001.fastq.gz', 'RNPv2/20200304_58_MP7624_S58_R2_001.fastq.gz', 'RNPv2/20200304_59_MP7624_S59_R1_001.fastq.gz', 'RNPv2/20200304_59_MP7624_S59_R2_001.fastq.gz', 'RNPv2/20200304_5_MP7624_S5_R1_001.fastq.gz', 'RNPv2/20200304_5_MP7624_S5_R2_001.fastq.gz', 'RNPv2/20200304_60_MP7624_S60_R1_001.fastq.gz', 'RNPv2/20200304_60_MP7624_S60_R2_001.fastq.gz', 'RNPv2/20200304_61_MP7624_S61_R1_001.fastq.gz', 'RNPv2/20200304_61_MP7624_S61_R2_001.fastq.gz', 'RNPv2/20200304_62_MP7624_S62_R1_001.fastq.gz', 'RNPv2/20200304_62_MP7624_S62_R2_001.fastq.gz', 'RNPv2/20200304_63_MP7624_S63_R1_001.fastq.gz', 'RNPv2/20200304_63_MP7624_S63_R2_001.fastq.gz', 'RNPv2/20200304_64_MP7624_S64_R1_001.fastq.gz', 'RNPv2/20200304_64_MP7624_S64_R2_001.fastq.gz', 'RNPv2/20200304_65_MP7624_S65_R1_001.fastq.gz', 'RNPv2/20200304_65_MP7624_S65_R2_001.fastq.gz', 'RNPv2/20200304_66_MP7624_S66_R1_001.fastq.gz', 'RNPv2/20200304_66_MP7624_S66_R2_001.fastq.gz', 'RNPv2/20200304_67_MP7624_S67_R1_001.fastq.gz', 'RNPv2/20200304_67_MP7624_S67_R2_001.fastq.gz', 'RNPv2/20200304_68_MP7624_S68_R1_001.fastq.gz', 'RNPv2/20200304_68_MP7624_S68_R2_001.fastq.gz', 'RNPv2/20200304_69_MP7624_S69_R1_001.fastq.gz', 'RNPv2/20200304_69_MP7624_S69_R2_001.fastq.gz', 'RNPv2/20200304_6_MP7624_S6_R1_001.fastq.gz', 'RNPv2/20200304_6_MP7624_S6_R2_001.fastq.gz', 'RNPv2/20200304_7_MP7624_S7_R1_001.fastq.gz', 'RNPv2/20200304_7_MP7624_S7_R2_001.fastq.gz', 'RNPv2/20200304_8_MP7624_S8_R1_001.fastq.gz', 'RNPv2/20200304_8_MP7624_S8_R2_001.fastq.gz', 'RNPv2/20200304_9_MP7624_S9_R1_001.fastq.gz', 'RNPv2/20200304_9_MP7624_S9_R2_001.fastq.gz']
> /home/jeremie/JKBio/TerraFunction.py(227)uploadFromFolder()
    226     ipdb.set_trace()
--> 227     df = pd.DataFrame(data)
    228     print(df)

ipdb> c
      sample_id                                             fastq1  \
0   20200304_10  gs://amlproject/RNPv2/20200304_10_MP7624_S10_R...   
1   20200304_11  gs://amlproject/RNPv2/20200304_11_MP7624_S11_R...   
2   20200304_12  gs://amlproject/RNPv2/20200304_12_MP7624_S12_R...   
3   20200304_13  gs://amlproject/RNPv2/20200304_13_MP7624_S13_R...   
4   20200304_14  gs://amlproject/RNPv2/20200304_14_MP7624_S14_R...   
..          ...                                                ...   
64  20200304_69  gs://amlproject/RNPv2/20200304_69_MP7624_S69_R...   
65   20200304_6  gs://amlproject/RNPv2/20200304_6_MP7624_S6_R1_...   
66   20200304_7  gs://amlproject/RNPv2/20200304_7_MP7624_S7_R1_...   
67   20200304_8  gs://amlproject/RNPv2/20200304_8_MP7624_S8_R1_...   
68   20200304_9  gs://amlproject/RNPv2/20200304_9_MP7624_S9_R1_...   

                                               fastq2  
0   gs://amlproject/RNPv2/20200304_10_MP7624_S10_R...  
1   gs://amlproject/RNPv2/20200304_11_MP7624_S11_R...  
2   gs://amlproject/RNPv2/20200304_12_MP7624_S12_R...  
3   gs://amlproject/RNPv2/20200304_13_MP7624_S13_R...  
4   gs://amlproject/RNPv2/20200304_14_MP7624_S14_R...  
..                                                ...  
64  gs://amlproject/RNPv2/20200304_69_MP7624_S69_R...  
65  gs://amlproject/RNPv2/20200304_6_MP7624_S6_R2_...  
66  gs://amlproject/RNPv2/20200304_7_MP7624_S7_R2_...  
67  gs://amlproject/RNPv2/20200304_8_MP7624_S8_R2_...  
68  gs://amlproject/RNPv2/20200304_9_MP7624_S9_R2_...  

[69 rows x 3 columns]
Successfully imported 69 participants.
Successfully imported 69 samples.
Successfully imported 1 sample sets:
  * MAX_AML_RNPv2 (69 samples)

Processing

In [4]:
wm = dm.WorkspaceManager('broad-firecloud-ccle/hg38_RNAseq')
In [19]:
submission_id = wm.create_submission("star_v1-0_BETA_cfg", sampleset, 'sample_set',expression='this.samples')
terra.waitForSubmission('broad-firecloud-ccle/hg38_RNAseq', submission_id)
Successfully created submission 2ad41571-b46e-4c3b-be51-44e800717d2a.
In [4]:
submission_id = wm.create_submission("rsem_v1-0_BETA_cfg", 
                                      sampleset,'sample_set',expression='this.samples')
terra.waitForSubmission('broad-firecloud-ccle/hg38_RNAseq', submission_id)
Successfully created submission cfd65243-2093-4007-9b21-c5b09c9fc875.
1tatus is: Failed for 0 jobs in submission 0. 2 mn elapsed.
10
11
12
13
14
15
16
17
18
19
2
20
21
22
23
24
25
26
27
28
29
3
30
31
32
33
34
35
36
37
38
39
4
40
41
42
43
44
45
46
47
48
49
5
50
51
52
53
54
55
56
57
58
59
6
60
61
62
63
64
65
66
67
68
69
7
70
71
72
73
8
9
0.0 of jobs Succeeded in submission 0.
-----------------------------------------------
RuntimeError  Traceback (most recent call last)
<ipython-input-4-50c8187cd693> in <module>
      1 submission_id = wm.create_submission("rsem_v1-0_BETA_cfg", 
      2                                       sampleset,'sample_set',expression='this.samples')
----> 3 terra.waitForSubmission('broad-firecloud-ccle/hg38_RNAseq', submission_id)

~/JKBio/TerraFunction.py in waitForSubmission(workspace, submissions, raise_errors)
     93         print(str(done / (done + failed)) + " of jobs Succeeded in submission " + str(scount) + ".")
     94   if len(failed_submission) > 0 and raise_errors:
---> 95     raise RuntimeError(str(len(failed_submission)) + " failed submission")
     96   return failed_submission
     97   # print and return well formated data

RuntimeError: 73 failed submission
In [5]:
submission_id = wm.create_submission("rsem_aggregate_results_v1-0_BETA_cfg", 
                                         sampleset)
terra.waitForSubmission('broad-firecloud-ccle/hg38_RNAseq', submission_id)
Successfully created submission 9be600dc-4db0-4af1-b607-503800cc45fc.
1.0 of jobs Succeeded in submission 0.sion 0. 210 mn elapsed.
Out[5]:
[]
In [5]:
results = wm.get_sample_sets().loc[sampleset]
rsem_genes_expected_count = results['rsem_genes_expected_count']
In [7]:
results
Out[7]:
samples                            [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1, 20...
rsem_transcripts_isopct            gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba...
rsem_transcripts_tpm               gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba...
rsem_transcripts_expected_count    gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba...
rsem_genes_tpm                     gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba...
rsem_genes_expected_count          gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba...
Name: RNPv3, dtype: object
In [8]:
mkdir ../../data/RNPv3
In [9]:
! gsutil cp $rsem_genes_expected_count ../../data/RNPv3/
Copying gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcbabe2da/9be600dc-4db0-4af1-b607-503800cc45fc/rsem_aggregate_results_workflow/abca308c-59a2-4ad5-8c87-9e4bdf407411/call-rsem_aggregate_results/RNPv3.rsem_genes_expected_count.txt.gz...
/ [1 files][  4.6 MiB/  4.6 MiB]                                                
Operation completed over 1 objects/4.6 MiB.                                      
In [6]:
file = '../../data/RNPv3/'+rsem_genes_expected_count.split('/')[-1]
In [11]:
file
Out[11]:
'../../data/RNPv3/RNPv3.rsem_genes_expected_count.txt.gz'
In [12]:
! gunzip $file

getting spike in scales

In [ ]:
dd
In [209]:
! bwa index -a bwtsw ../data/ERCC92/ERCC92.fa
[bwa_index] Pack FASTA... 0.00 sec
[bwa_index] Construct BWT for the packed sequence...
[BWTIncCreate] textLength=165512, availableWord=210772
[bwt_gen] Finished constructing BWT in 5 iterations.
[bwa_index] 0.02 seconds elapse.
[bwa_index] Update BWT... 0.00 sec
[bwa_index] Pack forward-only FASTA... 0.00 sec
[bwa_index] Construct SA from BWT and Occ... 0.01 sec
[main] Version: 0.7.5-r404
[main] CMD: bwa index -a bwtsw ../data/ERCC92/ERCC92.fa
[main] Real time: 0.162 sec; CPU: 0.032 sec
In [210]:
! samtools faidx ../data/ERCC92/ERCC92.fa
In [215]:
from JKBio import Helper as h
In [8]:
! ../../TrimGalore-0.6.5/trim_galore
Multicore support not enabled. Proceeding with single-core trimming.
Path to Cutadapt set as: 'cutadapt' (default)
Cutadapt seems to be working fine (tested command 'cutadapt --version')
Cutadapt version: 2.8
single-core operation.
No quality encoding type selected. Assuming that the data provided uses Sanger encoded Phred scores (default)


Please provide the filename(s) of one or more FastQ file(s) to launch Trim Galore!

USAGE:  'trim_galore [options] <filename(s)>'    or    'trim_galore --help'    for more options

In [11]:
ls -alh res
total 138M
drwxr-xr-x 2 jeremie jeremie 4.0K Mar 13 18:48 ./
drwxr-xr-x 5 jeremie jeremie 4.0K Mar 13 18:48 ../
-rw-r--r-- 1 jeremie jeremie  764 Mar 13 18:48 20200304_10_MP7624_S10_R1_001.fastq.gz_trimming_report.txt
-rw-r--r-- 1 jeremie jeremie  15M Mar 13 18:48 20200304_10_MP7624_S10_R1_001_trimmed.fq.gz
-rw-r--r-- 1 jeremie jeremie  764 Mar 13 18:48 20200304_11_MP7624_S11_R1_001.fastq.gz_trimming_report.txt
-rw-r--r-- 1 jeremie jeremie  14M Mar 13 18:48 20200304_11_MP7624_S11_R1_001_trimmed.fq.gz
-rw-r--r-- 1 jeremie jeremie  764 Mar 13 18:48 20200304_12_MP7624_S12_R1_001.fastq.gz_trimming_report.txt
-rw-r--r-- 1 jeremie jeremie  15M Mar 13 18:48 20200304_12_MP7624_S12_R1_001_trimmed.fq.gz
-rw-r--r-- 1 jeremie jeremie  764 Mar 13 18:48 20200304_13_MP7624_S13_R1_001.fastq.gz_trimming_report.txt
-rw-r--r-- 1 jeremie jeremie  14M Mar 13 18:48 20200304_13_MP7624_S13_R1_001_trimmed.fq.gz
-rw-r--r-- 1 jeremie jeremie  764 Mar 13 18:48 20200304_14_MP7624_S14_R1_001.fastq.gz_trimming_report.txt
-rw-r--r-- 1 jeremie jeremie  14M Mar 13 18:48 20200304_14_MP7624_S14_R1_001_trimmed.fq.gz
-rw-r--r-- 1 jeremie jeremie  763 Mar 13 18:48 20200304_15_MP7624_S15_R1_001.fastq.gz_trimming_report.txt
-rw-r--r-- 1 jeremie jeremie  14M Mar 13 18:48 20200304_15_MP7624_S15_R1_001_trimmed.fq.gz
-rw-r--r-- 1 jeremie jeremie  764 Mar 13 18:48 20200304_16_MP7624_S16_R1_001.fastq.gz_trimming_report.txt
-rw-r--r-- 1 jeremie jeremie  14M Mar 13 18:48 20200304_16_MP7624_S16_R1_001_trimmed.fq.gz
-rw-r--r-- 1 jeremie jeremie  764 Mar 13 18:48 20200304_17_MP7624_S17_R1_001.fastq.gz_trimming_report.txt
-rw-r--r-- 1 jeremie jeremie  14M Mar 13 18:48 20200304_17_MP7624_S17_R1_001_trimmed.fq.gz
-rw-r--r-- 1 jeremie jeremie  764 Mar 13 18:48 20200304_18_MP7624_S18_R1_001.fastq.gz_trimming_report.txt
-rw-r--r-- 1 jeremie jeremie  14M Mar 13 18:48 20200304_18_MP7624_S18_R1_001_trimmed.fq.gz
-rw-r--r-- 1 jeremie jeremie  764 Mar 13 18:48 20200304_19_MP7624_S19_R1_001.fastq.gz_trimming_report.txt
-rw-r--r-- 1 jeremie jeremie  15M Mar 13 18:48 20200304_19_MP7624_S19_R1_001_trimmed.fq.gz
In [50]:
fastqs = !ls res/*.sam
[autoreload of JKBio.Helper failed: Traceback (most recent call last):
  File "/home/jeremie/.local/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/home/jeremie/.local/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 394, in superreload
    module = reload(module)
  File "/usr/lib/python3.7/imp.py", line 314, in reload
    return importlib.reload(module)
  File "/usr/lib/python3.7/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 630, in _exec
  File "<frozen importlib._bootstrap_external>", line 724, in exec_module
  File "<frozen importlib._bootstrap_external>", line 860, in get_code
  File "<frozen importlib._bootstrap_external>", line 791, in source_to_code
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "../../JKBio/Helper.py", line 676
    file[1] + ' | ' pathtosam + ' sort - -o ' + results + file.split('.')[0] + '.sorted.bam'
                            ^
SyntaxError: invalid syntax
]
In [52]:
h.parrun(['samtools sort '+i+' -@ 8 -o '+i.split('.')[0]+'.sorted.bam' for i in fastqs], cores=1, add=['rm '+i for i in fastqs])
In [82]:
fastqs = !ls res/*.bam
fastqs = [v for i ,v in enumerate(fastqs) if i <30]
fastqs
Out[82]:
['res/20200304_10_MP7624_S10_R1_001_val_1.sorted.bam',
 'res/20200304_11_MP7624_S11_R1_001_val_1.sorted.bam',
 'res/20200304_12_MP7624_S12_R1_001_val_1.sorted.bam',
 'res/20200304_13_MP7624_S13_R1_001_val_1.sorted.bam',
 'res/20200304_14_MP7624_S14_R1_001_val_1.sorted.bam',
 'res/20200304_15_MP7624_S15_R1_001_val_1.sorted.bam',
 'res/20200304_16_MP7624_S16_R1_001_val_1.sorted.bam',
 'res/20200304_17_MP7624_S17_R1_001_val_1.sorted.bam',
 'res/20200304_18_MP7624_S18_R1_001_val_1.sorted.bam',
 'res/20200304_19_MP7624_S19_R1_001_val_1.sorted.bam',
 'res/20200304_1_MP7624_S1_R1_001_val_1.sorted.bam',
 'res/20200304_20_MP7624_S20_R1_001_val_1.sorted.bam',
 'res/20200304_21_MP7624_S21_R1_001_val_1.sorted.bam',
 'res/20200304_22_MP7624_S22_R1_001_val_1.sorted.bam',
 'res/20200304_23_MP7624_S23_R1_001_val_1.sorted.bam',
 'res/20200304_24_MP7624_S24_R1_001_val_1.sorted.bam',
 'res/20200304_25_MP7624_S25_R1_001_val_1.sorted.bam',
 'res/20200304_26_MP7624_S26_R1_001_val_1.sorted.bam',
 'res/20200304_27_MP7624_S27_R1_001_val_1.sorted.bam',
 'res/20200304_28_MP7624_S28_R1_001_val_1.sorted.bam',
 'res/20200304_29_MP7624_S29_R1_001_val_1.sorted.bam',
 'res/20200304_2_MP7624_S2_R1_001_val_1.sorted.bam',
 'res/20200304_30_MP7624_S30_R1_001_val_1.sorted.bam',
 'res/20200304_31_MP7624_S31_R1_001_val_1.sorted.bam',
 'res/20200304_32_MP7624_S32_R1_001_val_1.sorted.bam',
 'res/20200304_33_MP7624_S33_R1_001_val_1.sorted.bam',
 'res/20200304_34_MP7624_S34_R1_001_val_1.sorted.bam',
 'res/20200304_35_MP7624_S35_R1_001_val_1.sorted.bam',
 'res/20200304_36_MP7624_S36_R1_001_val_1.sorted.bam',
 'res/20200304_37_MP7624_S37_R1_001_val_1.sorted.bam']
In [84]:
h.getSpikeInControlScales('../data/ERCC92/ERCC92.fa', fastq=list(fastqs), mapper='bwa', pairedEnd=True, cores=10, pathtosam='samtools', pathtotrim_galore='../../TrimGalore-0.6.5/trim_galore', pathtobwa='bwa',totrim=False, tomap=False, tofilter=True, results='res/', toremove=True)
if paired_end, need to be name_*1, name_*2
your files need to be all in the same folder
['20200304_10_MP7624_S10_R1_001_val_1.sorted.bam', '20200304_11_MP7624_S11_R1_001_val_1.sorted.bam', '20200304_12_MP7624_S12_R1_001_val_1.sorted.bam', '20200304_13_MP7624_S13_R1_001_val_1.sorted.bam', '20200304_14_MP7624_S14_R1_001_val_1.sorted.bam', '20200304_15_MP7624_S15_R1_001_val_1.sorted.bam', '20200304_16_MP7624_S16_R1_001_val_1.sorted.bam', '20200304_17_MP7624_S17_R1_001_val_1.sorted.bam', '20200304_18_MP7624_S18_R1_001_val_1.sorted.bam', '20200304_19_MP7624_S19_R1_001_val_1.sorted.bam', '20200304_1_MP7624_S1_R1_001_val_1.sorted.bam', '20200304_20_MP7624_S20_R1_001_val_1.sorted.bam', '20200304_21_MP7624_S21_R1_001_val_1.sorted.bam', '20200304_22_MP7624_S22_R1_001_val_1.sorted.bam', '20200304_23_MP7624_S23_R1_001_val_1.sorted.bam', '20200304_24_MP7624_S24_R1_001_val_1.sorted.bam', '20200304_25_MP7624_S25_R1_001_val_1.sorted.bam', '20200304_26_MP7624_S26_R1_001_val_1.sorted.bam', '20200304_27_MP7624_S27_R1_001_val_1.sorted.bam', '20200304_28_MP7624_S28_R1_001_val_1.sorted.bam', '20200304_29_MP7624_S29_R1_001_val_1.sorted.bam', '20200304_2_MP7624_S2_R1_001_val_1.sorted.bam', '20200304_30_MP7624_S30_R1_001_val_1.sorted.bam', '20200304_31_MP7624_S31_R1_001_val_1.sorted.bam', '20200304_32_MP7624_S32_R1_001_val_1.sorted.bam', '20200304_33_MP7624_S33_R1_001_val_1.sorted.bam', '20200304_34_MP7624_S34_R1_001_val_1.sorted.bam', '20200304_35_MP7624_S35_R1_001_val_1.sorted.bam', '20200304_36_MP7624_S36_R1_001_val_1.sorted.bam', '20200304_37_MP7624_S37_R1_001_val_1.sorted.bam']
you need to have your files in the res/ folder


filtering




counting


Out[84]:
({'20200304_10_MP7624_S10_R1_001_val_1': 0.16898942258544017,
  '20200304_11_MP7624_S11_R1_001_val_1': 0.2137747794550614,
  '20200304_12_MP7624_S12_R1_001_val_1': 0.1744152888937967,
  '20200304_13_MP7624_S13_R1_001_val_1': 0.6105037644754658,
  '20200304_14_MP7624_S14_R1_001_val_1': 1.0,
  '20200304_15_MP7624_S15_R1_001_val_1': 0.5340835638285261,
  '20200304_16_MP7624_S16_R1_001_val_1': 0.35631786259352977,
  '20200304_17_MP7624_S17_R1_001_val_1': 0.6156760495423441,
  '20200304_18_MP7624_S18_R1_001_val_1': 0.727821906530712,
  '20200304_19_MP7624_S19_R1_001_val_1': 0.07483668099259128,
  '20200304_1_MP7624_S1_R1_001_val_1': 0.47783743300316456,
  '20200304_20_MP7624_S20_R1_001_val_1': 0.3124741021318431,
  '20200304_21_MP7624_S21_R1_001_val_1': 0.31825232376633517,
  '20200304_22_MP7624_S22_R1_001_val_1': 0.9675827613577999,
  '20200304_23_MP7624_S23_R1_001_val_1': 0.5763662983450001,
  '20200304_24_MP7624_S24_R1_001_val_1': 0.6390725535862314,
  '20200304_25_MP7624_S25_R1_001_val_1': 0.8755348849786047,
  '20200304_26_MP7624_S26_R1_001_val_1': 0.7450538798837408,
  '20200304_27_MP7624_S27_R1_001_val_1': 0.7762981432726411,
  '20200304_28_MP7624_S28_R1_001_val_1': 0.7081911099506929,
  '20200304_29_MP7624_S29_R1_001_val_1': 0.8619769958886765,
  '20200304_2_MP7624_S2_R1_001_val_1': 0.1737388317027055,
  '20200304_30_MP7624_S30_R1_001_val_1': 0.809013539047738,
  '20200304_31_MP7624_S31_R1_001_val_1': 0.3852663157088453,
  '20200304_32_MP7624_S32_R1_001_val_1': 0.28762189014952155,
  '20200304_33_MP7624_S33_R1_001_val_1': 0.32142600106714314,
  '20200304_34_MP7624_S34_R1_001_val_1': 0.7614738906061448,
  '20200304_35_MP7624_S35_R1_001_val_1': 0.6038299707448979,
  '20200304_36_MP7624_S36_R1_001_val_1': 0.46878297111481504,
  '20200304_37_MP7624_S37_R1_001_val_1': 0.7230628158623067},
 {'20200304_10_MP7624_S10_R1_001_val_1': 3065683,
  '20200304_11_MP7624_S11_R1_001_val_1': 2423429,
  '20200304_12_MP7624_S12_R1_001_val_1': 2970313,
  '20200304_13_MP7624_S13_R1_001_val_1': 848591,
  '20200304_14_MP7624_S14_R1_001_val_1': 518068,
  '20200304_15_MP7624_S15_R1_001_val_1': 970013,
  '20200304_16_MP7624_S16_R1_001_val_1': 1453949,
  '20200304_17_MP7624_S17_R1_001_val_1': 841462,
  '20200304_18_MP7624_S18_R1_001_val_1': 711806,
  '20200304_19_MP7624_S19_R1_001_val_1': 6922648,
  '20200304_1_MP7624_S1_R1_001_val_1': 1084193,
  '20200304_20_MP7624_S20_R1_001_val_1': 1657955,
  '20200304_21_MP7624_S21_R1_001_val_1': 1627853,
  '20200304_22_MP7624_S22_R1_001_val_1': 535425,
  '20200304_23_MP7624_S23_R1_001_val_1': 898852,
  '20200304_24_MP7624_S24_R1_001_val_1': 810656,
  '20200304_25_MP7624_S25_R1_001_val_1': 591716,
  '20200304_26_MP7624_S26_R1_001_val_1': 695343,
  '20200304_27_MP7624_S27_R1_001_val_1': 667357,
  '20200304_28_MP7624_S28_R1_001_val_1': 731537,
  '20200304_29_MP7624_S29_R1_001_val_1': 601023,
  '20200304_2_MP7624_S2_R1_001_val_1': 2981878,
  '20200304_30_MP7624_S30_R1_001_val_1': 640370,
  '20200304_31_MP7624_S31_R1_001_val_1': 1344701,
  '20200304_32_MP7624_S32_R1_001_val_1': 1801212,
  '20200304_33_MP7624_S33_R1_001_val_1': 1611780,
  '20200304_34_MP7624_S34_R1_001_val_1': 680349,
  '20200304_35_MP7624_S35_R1_001_val_1': 857970,
  '20200304_36_MP7624_S36_R1_001_val_1': 1105134,
  '20200304_37_MP7624_S37_R1_001_val_1': 716491})
In [60]:
for f in h.grouped(fastqs, 2):
    print(f[0])
res/20200304_38_MP7624_S38_R1_001_val_1.fq.gz
res/20200304_39_MP7624_S39_R1_001_val_1.fq.gz
res/20200304_3_MP7624_S3_R1_001_val_1.fq.gz
res/20200304_40_MP7624_S40_R1_001_val_1.fq.gz
res/20200304_41_MP7624_S41_R1_001_val_1.fq.gz
res/20200304_42_MP7624_S42_R1_001_val_1.fq.gz
res/20200304_43_MP7624_S43_R1_001_val_1.fq.gz
res/20200304_44_MP7624_S44_R1_001_val_1.fq.gz
res/20200304_45_MP7624_S45_R1_001_val_1.fq.gz
res/20200304_46_MP7624_S46_R1_001_val_1.fq.gz
res/20200304_47_MP7624_S47_R1_001_val_1.fq.gz
res/20200304_48_MP7624_S48_R1_001_val_1.fq.gz
res/20200304_49_MP7624_S49_R1_001_val_1.fq.gz
res/20200304_4_MP7624_S4_R1_001_val_1.fq.gz
res/20200304_50_MP7624_S50_R1_001_val_1.fq.gz
res/20200304_51_MP7624_S51_R1_001_val_1.fq.gz
res/20200304_52_MP7624_S52_R1_001_val_1.fq.gz
res/20200304_53_MP7624_S53_R1_001_val_1.fq.gz
res/20200304_54_MP7624_S54_R1_001_val_1.fq.gz
res/20200304_55_MP7624_S55_R1_001_val_1.fq.gz
res/20200304_56_MP7624_S56_R1_001_val_1.fq.gz
res/20200304_57_MP7624_S57_R1_001_val_1.fq.gz
res/20200304_58_MP7624_S58_R1_001_val_1.fq.gz
res/20200304_59_MP7624_S59_R1_001_val_1.fq.gz
res/20200304_5_MP7624_S5_R1_001_val_1.fq.gz
res/20200304_60_MP7624_S60_R1_001_val_1.fq.gz
res/20200304_61_MP7624_S61_R1_001_val_1.fq.gz
res/20200304_62_MP7624_S62_R1_001_val_1.fq.gz
res/20200304_63_MP7624_S63_R1_001_val_1.fq.gz
res/20200304_64_MP7624_S64_R1_001_val_1.fq.gz
res/20200304_65_MP7624_S65_R1_001_val_1.fq.gz
res/20200304_66_MP7624_S66_R1_001_val_1.fq.gz
res/20200304_67_MP7624_S67_R1_001_val_1.fq.gz
res/20200304_68_MP7624_S68_R1_001_val_1.fq.gz
res/20200304_69_MP7624_S69_R1_001_val_1.fq.gz
res/20200304_6_MP7624_S6_R1_001_val_1.fq.gz
res/20200304_7_MP7624_S7_R1_001_val_1.fq.gz
res/20200304_8_MP7624_S8_R1_001_val_1.fq.gz
res/20200304_9_MP7624_S9_R1_001_val_1.fq.gz

Analysis

In [7]:
file
Out[7]:
'../../data/RNPv3/RNPv3.rsem_genes_expected_count.txt.gz'
In [8]:
rsem_genes_expected_count = pd.read_csv(file[:-3], sep='\t')
In [147]:
rsem_genes_expected_count = pd.read_csv("../../data/RNPv2/MAX_AML_RNPv2.rsem_genes_expected_count.txt", sep='\t')
In [250]:
data = rsem_genes_expected_count.drop("transcript_id(s)",1)
In [251]:
data["gene_id"] = h.convertGenes(data['gene_id'])[0]
you need access to taiga for this (https://pypi.org/project/taigapy/)
20702 could not be parsed... we don't have all genes already
In [252]:
data=data.set_index('gene_id')
In [253]:
data
Out[253]:
1 10 11 12 13 14 15 16 17 18 ... 67 68 69 7 70 71 72 73 8 9
gene_id
TSPAN6 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00
TNMD 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00
DPM1 1619.00 2465.00 1701.00 1535.00 1863.00 2093.00 2027.00 2202.00 2148.00 2235.00 ... 1620.00 1840.00 1729.00 1983.00 1926.0 1846.00 1915.00 2633.00 2451.00 2378.00
SCYL3 464.57 846.12 672.69 603.75 577.41 617.97 601.43 545.49 575.14 536.97 ... 430.78 460.04 437.36 542.42 572.5 507.48 580.49 713.56 670.02 576.38
C1orf112 780.43 1031.90 755.31 676.25 1232.70 1209.00 1309.60 1370.50 1245.90 1257.10 ... 949.22 1277.00 1032.60 1163.60 783.5 1088.50 1184.50 1572.40 1481.00 1332.90
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
ERCC-00164 3.00 5.00 8.00 2.00 2.00 1.00 2.00 1.00 3.00 3.00 ... 1.00 1.00 5.00 1.00 6.0 3.00 3.00 4.00 2.00 4.00
ERCC-00165 215.00 594.00 424.00 509.00 136.00 88.00 165.00 258.00 161.00 163.00 ... 93.00 139.00 87.00 127.00 628.0 207.00 151.00 241.00 187.00 176.00
ERCC-00168 3.00 12.00 9.00 8.00 0.00 8.00 0.00 5.00 5.00 1.00 ... 3.00 4.00 1.00 3.00 8.0 5.00 4.00 7.00 8.00 3.00
ERCC-00170 66.00 205.00 133.00 211.00 57.00 40.00 73.00 94.00 42.00 40.00 ... 41.00 56.00 33.00 50.00 141.0 72.00 92.00 110.00 89.00 88.00
ERCC-00171 13554.00 40900.00 29090.00 33242.00 10039.00 6399.00 10836.00 15684.00 9526.00 8893.00 ... 7058.00 7576.00 5882.00 8381.00 47913.0 12046.00 10447.00 17316.00 10492.00 12389.00

58813 rows × 73 columns

In [254]:
rename = {"1": "mr120-MV411-RNP_IRF2BP2-r4",
"2": "mr121-MV411-RNP_IRF2BP2-r5",
"3": "mr122-MV411-RNP_IRF2BP2-r6",
"4": "mr123-MV411-RNP_IRF8-r4",
"5": "mr124-MV411-RNP_IRF8-r5",
"6": "mr125-MV411-RNP_IRF8-r6",
"7": "mr126-MV411-RNP_MEF2D-r4",
"8": "mr127-MV411-RNP_MEF2D-r5",
"9": "mr128-MV411-RNP_MEF2D-r6",
"10": "mr129-MV411-RNP_MYC-r4",
"11": "mr130-MV411-RNP_MYC-r5",
"12": "mr131-MV411-RNP_MYC-r6",
"13": "mr132-MV411-RNP_RUNX1-r4",
"14": "mr133-MV411-RNP_RUNX1-r5",
"15": "mr134-MV411-RNP_RUNX1-r6",
"16": "mr135-MV411-RNP_RUNX2-r4",
"17": "mr136-MV411-RNP_RUNX2-r5",
"18": "mr137-MV411-RNP_RUNX2-r6",
"19": "mr138-MV411-RNP_SPI1-r4",
"20": "mr139-MV411-RNP_SPI1-r5",
"21": "mr140-MV411-RNP_SPI1-r6",
"22": "mr141-MV411-RNP_ZMYND8-r4",
"23": "mr142-MV411-RNP_ZMYND8-r5",
"24": "mr143-MV411-RNP_ZMYND8-r6",
"25": "mr144-MV411-RNP_LMO2-r4",
"26": "mr145-MV411-RNP_LMO2-r5",
"27": "mr146-MV411-RNP_LMO2-r6",
"28": "mr147-MV411-RNP_LYL1-r4",
"29": "mr148-MV411-RNP_LYL1-r5",
"30": "mr149-MV411-RNP_LYL1-r6",
"31": "mr150-MV411-RNP_MAX-r4",
"32": "mr151-MV411-RNP_MAX-r5",
"33": "mr152-MV411-RNP_MAX-r6",
"34": "mr153-MV411-RNP_ZEB2-r4",
"35": "mr154-MV411-RNP_ZEB2-r5",
"36": "mr155-MV411-RNP_ZEB2-r6",
"37": "mr156-MV411-RNP_MEF2C-r4",
"38": "mr157-MV411-RNP_MEF2C-r5",
"39": "mr158-MV411-RNP_MEF2C-r6",
"40": "mr159-MV411-RNP_MEIS1-r4",
"41": "mr160-MV411-RNP_MEIS1-r5",
"42": "mr161-MV411-RNP_MEIS1-r6",
"43": "mr162-MV411-RNP_FLI1-r4",
"44": "mr163-MV411-RNP_FLI1-r5",
"45": "mr164-MV411-RNP_FLI1-r6",
"46": "mr165-MV411-RNP_ELF2-r4",
"47": "mr166-MV411-RNP_ELF2-r5",
"48": "mr167-MV411-RNP_ELF2-r6",
"49": "mr168-MV411-RNP_GFI1-r4",
"50": "mr169-MV411-RNP_GFI1-r5",
"51": "mr170-MV411-RNP_GFI1-r6",
"52": "mr171-MV411-RNP_IKZF1-r4",
"53": "mr172-MV411-RNP_IKZF1-r5",
"54": "mr173-MV411-RNP_IKZF1-r6",
"55": "mr174-MV411-RNP_CEBPA-r4",
"56": "mr175-MV411-RNP_CEBPA-r5",
"57": "mr176-MV411-RNP_CEBPA-r6",
"58": "mr177-MV411-RNP_MYB-r4",
"59": "mr178-MV411-RNP_MYB-r5",
"60": "mr179-MV411-RNP_MYB-r6",
"61": "mr180-MV411-RNP_MYBL2-r1",
"62": "mr181-MV411-RNP_MYBL2-r2",
"63": "mr182-MV411-RNP_MYBL2-r3",
"64": "mr183-MV411-RNP_HOXA9-r4",
"65": "mr184-MV411-RNP_HOXA9-r5",
"66": "mr185-MV411-RNP_HOXA9-r6",
"67": "mr186-MV411-RNP_AAVS1-r1",
"68": "mr187-MV411-RNP_AAVS1-r2",
"69": "mr188-MV411-RNP_AAVS1-r3",
"70": "mr189-MV411-RNP_SP1-r4",
"71": "mr190-MV411-RNP_SP1-r5",
"72": "mr191-MV411-RNP_SP1-r6",
"73": "mr192-MV411-RNP_SP1-r7"}
In [255]:
data.columns
Out[255]:
Index(['1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '2',
       '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '3', '30',
       '31', '32', '33', '34', '35', '36', '37', '38', '39', '4', '40', '41',
       '42', '43', '44', '45', '46', '47', '48', '49', '5', '50', '51', '52',
       '53', '54', '55', '56', '57', '58', '59', '6', '60', '61', '62', '63',
       '64', '65', '66', '67', '68', '69', '7', '70', '71', '72', '73', '8',
       '9'],
      dtype='object')
In [256]:
data.columns = [rename[i] for i in data.columns]
In [257]:
data
Out[257]:
mr120-MV411-RNP_IRF2BP2-r4 mr129-MV411-RNP_MYC-r4 mr130-MV411-RNP_MYC-r5 mr131-MV411-RNP_MYC-r6 mr132-MV411-RNP_RUNX1-r4 mr133-MV411-RNP_RUNX1-r5 mr134-MV411-RNP_RUNX1-r6 mr135-MV411-RNP_RUNX2-r4 mr136-MV411-RNP_RUNX2-r5 mr137-MV411-RNP_RUNX2-r6 ... mr186-MV411-RNP_AAVS1-r1 mr187-MV411-RNP_AAVS1-r2 mr188-MV411-RNP_AAVS1-r3 mr126-MV411-RNP_MEF2D-r4 mr189-MV411-RNP_SP1-r4 mr190-MV411-RNP_SP1-r5 mr191-MV411-RNP_SP1-r6 mr192-MV411-RNP_SP1-r7 mr127-MV411-RNP_MEF2D-r5 mr128-MV411-RNP_MEF2D-r6
gene_id
TSPAN6 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00
TNMD 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00
DPM1 1619.00 2465.00 1701.00 1535.00 1863.00 2093.00 2027.00 2202.00 2148.00 2235.00 ... 1620.00 1840.00 1729.00 1983.00 1926.0 1846.00 1915.00 2633.00 2451.00 2378.00
SCYL3 464.57 846.12 672.69 603.75 577.41 617.97 601.43 545.49 575.14 536.97 ... 430.78 460.04 437.36 542.42 572.5 507.48 580.49 713.56 670.02 576.38
C1orf112 780.43 1031.90 755.31 676.25 1232.70 1209.00 1309.60 1370.50 1245.90 1257.10 ... 949.22 1277.00 1032.60 1163.60 783.5 1088.50 1184.50 1572.40 1481.00 1332.90
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
ERCC-00164 3.00 5.00 8.00 2.00 2.00 1.00 2.00 1.00 3.00 3.00 ... 1.00 1.00 5.00 1.00 6.0 3.00 3.00 4.00 2.00 4.00
ERCC-00165 215.00 594.00 424.00 509.00 136.00 88.00 165.00 258.00 161.00 163.00 ... 93.00 139.00 87.00 127.00 628.0 207.00 151.00 241.00 187.00 176.00
ERCC-00168 3.00 12.00 9.00 8.00 0.00 8.00 0.00 5.00 5.00 1.00 ... 3.00 4.00 1.00 3.00 8.0 5.00 4.00 7.00 8.00 3.00
ERCC-00170 66.00 205.00 133.00 211.00 57.00 40.00 73.00 94.00 42.00 40.00 ... 41.00 56.00 33.00 50.00 141.0 72.00 92.00 110.00 89.00 88.00
ERCC-00171 13554.00 40900.00 29090.00 33242.00 10039.00 6399.00 10836.00 15684.00 9526.00 8893.00 ... 7058.00 7576.00 5882.00 8381.00 47913.0 12046.00 10447.00 17316.00 10492.00 12389.00

58813 rows × 73 columns

pre processing

filter some more

In [258]:
toremove = np.argwhere(data.values.var(1)==0)
toremove.ravel()
Out[258]:
array([    1,    15,    24, ..., 58714, 58715, 58718])
In [259]:
toremove.shape
Out[259]:
(19991, 1)
In [260]:
data = data.drop(data.iloc[toremove.ravel()].index,0)
In [261]:
data.shape
Out[261]:
(38787, 73)
In [262]:
ERCC = data[~data.index.str.contains('ENSG00')]
In [263]:
data = data[~data.index.str.contains('ERCC-')]
In [264]:
ensg = data[data.index.str.contains('ENSG00')]
In [265]:
data = data[~data.index.str.contains('ENSG00')]

renormalize the data

In [266]:
len(ERCC)
Out[266]:
26672

Getting the Core TF information

In [34]:
ctf=pd.read_csv('../data/CTF.csv',header=None)[0].values.tolist()
ctf
Out[34]:
['MYC',
 'MYB',
 'SPI1',
 'RUNX1',
 'GSE1',
 'IRF2BP2',
 'FLI1',
 'ELF2',
 'ZEB2',
 'IKAROS',
 'GFI1',
 'LMO2',
 'CEBPA',
 'MEF2D',
 'MEF2C',
 'IRF8',
 'MEIS1',
 'RUNX2',
 'ETV6',
 'LDB1',
 'RUNX2',
 'SP1',
 'ZMYND8']
In [35]:
genenames = data.index
ctfpos = [i for i, val in enumerate(genenames) if val in ctf]
notctfpos = [i for i, val in enumerate(genenames) if val not in ctf]

We find a CTF not in the dataset

In [36]:
[val for val in ctf if val not in genenames]
Out[36]:
['IKAROS']
In [37]:
ctf.remove('IKAROS')

correlation filtering

Making and running the dashboard

In [43]:
%%R
library('erccdashboard')
R[write to console]: Loading required package: ggplot2

R[write to console]: Loading required package: gridExtra

R[write to console]: 
Attaching package: ‘gridExtra’


R[write to console]: The following object is masked from ‘package:Biobase’:

    combine


R[write to console]: The following object is masked from ‘package:BiocGenerics’:

    combine


In [267]:
ERCC = ERCC.astype(int)
In [268]:
ERCC['Feature'] = ERCC.index
In [269]:
ERCC
Out[269]:
mr120-MV411-RNP_IRF2BP2-r4 mr129-MV411-RNP_MYC-r4 mr130-MV411-RNP_MYC-r5 mr131-MV411-RNP_MYC-r6 mr132-MV411-RNP_RUNX1-r4 mr133-MV411-RNP_RUNX1-r5 mr134-MV411-RNP_RUNX1-r6 mr135-MV411-RNP_RUNX2-r4 mr136-MV411-RNP_RUNX2-r5 mr137-MV411-RNP_RUNX2-r6 ... mr187-MV411-RNP_AAVS1-r2 mr188-MV411-RNP_AAVS1-r3 mr126-MV411-RNP_MEF2D-r4 mr189-MV411-RNP_SP1-r4 mr190-MV411-RNP_SP1-r5 mr191-MV411-RNP_SP1-r6 mr192-MV411-RNP_SP1-r7 mr127-MV411-RNP_MEF2D-r5 mr128-MV411-RNP_MEF2D-r6 Feature
gene_id
TSPAN6 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 TSPAN6
DPM1 1619 2465 1701 1535 1863 2093 2027 2202 2148 2235 ... 1840 1729 1983 1926 1846 1915 2633 2451 2378 DPM1
SCYL3 464 846 672 603 577 617 601 545 575 536 ... 460 437 542 572 507 580 713 670 576 SCYL3
C1orf112 780 1031 755 676 1232 1209 1309 1370 1245 1257 ... 1277 1032 1163 783 1088 1184 1572 1481 1332 C1orf112
FGR 1443 8556 6387 5955 2359 2615 2258 3340 3229 3466 ... 2401 2230 3680 2016 2285 2384 3106 4706 4308 FGR
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
ERCC-00164 3 5 8 2 2 1 2 1 3 3 ... 1 5 1 6 3 3 4 2 4 ERCC-00164
ERCC-00165 215 594 424 509 136 88 165 258 161 163 ... 139 87 127 628 207 151 241 187 176 ERCC-00165
ERCC-00168 3 12 9 8 0 8 0 5 5 1 ... 4 1 3 8 5 4 7 8 3 ERCC-00168
ERCC-00170 66 205 133 211 57 40 73 94 42 40 ... 56 33 50 141 72 92 110 89 88 ERCC-00170
ERCC-00171 13554 40900 29090 33242 10039 6399 10836 15684 9526 8893 ... 7576 5882 8381 47913 12046 10447 17316 10492 12389 ERCC-00171

26672 rows × 74 columns

In [271]:
experiments = list(set([i.split('-')[2] for i in ERCC.columns[:-1]]))
experiments.remove("RNP_AAVS1")
In [140]:
from rpy2.robjects.packages import importr
erccdashboard = importr('erccdashboard')
In [144]:
#TODO: compute the mass from concentration
###################################################
### code chunk number 3: defineInputData
###################################################
%R datType = "count" # "count" for RNA-Seq data, "array" for microarray data
%R isNorm = False # flag to indicate if input expression measures are already
               # normalized, default is FALSE 
%R filenameRoot = "RNPv2" # user defined filename prefix for results files
%R sample2Name = "AAAVS1" # name for sample 2 in the experiment
%R erccmix = "RatioPair" # name of ERCC mixture design, "RatioPair" is default
%R erccdilution = 1/100 # dilution factor used for Ambion spike-in mixtures
%R spikeVol = 1 # volume (in microliters) of diluted spike-in mixture added to 
             #   total RNA mass
%R totalRNAmass = 0.500 # mass (in micrograms) of total RNA 
%R choseFDR = 0.05 # user defined false discovery rate (FDR), default is 0.05
In [272]:
cols = list(ERCC.columns)
cols.sort()
for val in experiments:
    data = {}
    e=0
    data.update({
        'Feature':'Feature'
    })
    for i in cols:
        if val in i:
            e+=1
            data.update({i: val.split('_')[-1]+'_'+str(e)})
    data.update({
        'mr186-MV411-RNP_AAVS1-r1': 'AAAVS1_1',
        'mr187-MV411-RNP_AAVS1-r2': 'AAAVS1_2',
        'mr188-MV411-RNP_AAVS1-r3': 'AAAVS1_3'
    })
    a = ERCC[list(data.keys())].rename(columns=data)
    a.to_csv('../data/ERCC_estimation.csv', index=None)
    val = val.split('_')[-1]
    torm = 'RNPv2.'+val+'.AAAVS1.All.Pvals.csv'
    ! rm $torm 
    %R -i val print(val)
    %R print(sample2Name)
    %R a <- read.csv('../data/ERCC_estimation.csv')
    %R print(head(a))
    %R exDat = ''
    try:
        %R -i val exDat = runDashboard(datType=datType, isNorm = isNorm, exTable=a, filenameRoot=filenameRoot, sample1Name=val, sample2Name=sample2Name, erccmix=erccmix, erccdilution=erccdilution, spikeVol=spikeVol, totalRNAmass=totalRNAmass, choseFDR=choseFDR)
    except Warning:
        print("failed for "+val)
        continue
    except:
        print('worked for '+val)
    %R print(summary(exDat))
    %R grid.arrange(exDat$Figures$dynRangePlot)
    %R grid.arrange(exDat$Figures$rocPlot)
    %R grid.arrange(exDat$Figures$lodrERCCPlot)
    %R grid.arrange(exDat$Figures$maPlot)
rm: cannot remove 'RNPv2.SPI1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "SPI1"
[1] "AAAVS1"
   Feature SPI1_1 SPI1_2 SPI1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6      0      0      1        0        0        0
2     DPM1   2415   1729   2302     1620     1840     1729
3    SCYL3    798    648    744      430      460      437
4 C1orf112   1054    742   1104      949     1277     1032
5      FGR   2369   1766   2458     2323     2401     2230
6      CFH     44     22     58        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.SPI1.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  17191 transcripts remain for  analysis.
A total of 11 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00016 ERCC-00024 ERCC-00048 ERCC-00057 ERCC-00061
ERCC-00075 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104
ERCC-00142

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2758.5 2102.5 2723 1622 1888 1696
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
81 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00123 ERCC-00097 ERCC-00134 ERCC-00147 ERCC-00033
ERCC-00028 ERCC-00085 ERCC-00014 ERCC-00170 ERCC-00144
ERCC-00019 ERCC-00062 ERCC-00095 ERCC-00131 ERCC-00092
ERCC-00116 ERCC-00108 ERCC-00136 ERCC-00004 ERCC-00130

GLM log(r_m) estimate:
-1.172822 

GLM log(r_m) estimate weighted s.e.:
0.9182462 

Number of ERCCs in Mix 1 dyn range:  81 

Number of ERCCs in Mix 2 dyn range:  81 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00012 ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00134
ERCC-00137 ERCC-00138 ERCC-00017 ERCC-00041 ERCC-00073
ERCC-00081 ERCC-00156


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.922442 7.650882 7.909489 7.391415 7.543273 7.436028 
Disp = 0.00583 , BCV = 0.0763 
Disp = 0.00582 , BCV = 0.0763 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.09198248 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.598       22     23
 1:1.5 0.626       19     23
   1:2 0.665       20     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for SPI1
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
rm: cannot remove 'RNPv2.RUNX2.AAAVS1.All.Pvals.csv': No such file or directory
[1] "RUNX2"
[1] "AAAVS1"
   Feature RUNX2_1 RUNX2_2 RUNX2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    2202    2148    2235     1620     1840     1729
3    SCYL3     545     575     536      430      460      437
4 C1orf112    1370    1245    1257      949     1277     1032
5      FGR    3340    3229    3466     2323     2401     2230
6      CFH      16      12      14        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.RUNX2.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  17042 transcripts remain for  analysis.
A total of 20 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00109 ERCC-00117
ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2369.75 2268 2240.75 1638.75 1908.5 1710.75
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
72 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00147 ERCC-00077 ERCC-00154 ERCC-00028 ERCC-00085
ERCC-00160 ERCC-00170 ERCC-00144 ERCC-00059 ERCC-00163
ERCC-00019 ERCC-00062 ERCC-00095 ERCC-00078 ERCC-00071
ERCC-00079 ERCC-00131 ERCC-00165 ERCC-00092 ERCC-00076
ERCC-00112 ERCC-00022 ERCC-00111 ERCC-00116 ERCC-00108
ERCC-00043 ERCC-00145 ERCC-00136 ERCC-00003 ERCC-00004
ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00130 ERCC-00002

GLM log(r_m) estimate:
-0.06126088 

GLM log(r_m) estimate weighted s.e.:
0.8674952 

Number of ERCCs in Mix 1 dyn range:  72 

Number of ERCCs in Mix 2 dyn range:  72 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00073 ERCC-00097 ERCC-00134 ERCC-00104


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.77054 7.726654 7.714566 7.401689 7.554073 7.444687 
Disp = 0.00341 , BCV = 0.0584 
Disp = 0.00341 , BCV = 0.0584 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.00174422 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.591       19     23
 1:1.5 0.611       18     23
   1:2 0.680       17     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for RUNX2
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
rm: cannot remove 'RNPv2.GFI1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "GFI1"
[1] "AAAVS1"
   Feature GFI1_1 GFI1_2 GFI1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6      0      0      0        0        0        0
2     DPM1   3000    984   1798     1620     1840     1729
3    SCYL3    708    258    466      430      460      437
4 C1orf112   1813    586   1037      949     1277     1032
5      FGR   2396    788   1525     2323     2401     2230
6      CFH     42     18     35        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.GFI1.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16711 transcripts remain for  analysis.
A total of 21 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00134 ERCC-00138 ERCC-00142
ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
3122 1018 1947 1690.5 1977 1757
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
71 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00097 ERCC-00147 ERCC-00077 ERCC-00033 ERCC-00154
ERCC-00028 ERCC-00058 ERCC-00069 ERCC-00085 ERCC-00143
ERCC-00054 ERCC-00160 ERCC-00170 ERCC-00144 ERCC-00157
ERCC-00019 ERCC-00059 ERCC-00163 ERCC-00099 ERCC-00062
ERCC-00095 ERCC-00084 ERCC-00078 ERCC-00071 ERCC-00131
ERCC-00079 ERCC-00165 ERCC-00035 ERCC-00092 ERCC-00044
ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108
ERCC-00111 ERCC-00043 ERCC-00009 ERCC-00136 ERCC-00145
ERCC-00003 ERCC-00171 ERCC-00004 ERCC-00046 ERCC-00113
ERCC-00074 ERCC-00096 ERCC-00130 ERCC-00002

GLM log(r_m) estimate:
0.2828325 

GLM log(r_m) estimate weighted s.e.:
0.852983 

Number of ERCCs in Mix 1 dyn range:  71 

Number of ERCCs in Mix 2 dyn range:  71 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00040 ERCC-00097 ERCC-00120 ERCC-00137 ERCC-00158
ERCC-00164 ERCC-00168 ERCC-00073


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
8.046229 6.925595 7.574045 7.43278 7.589336 7.471363 
Disp = 0.0035 , BCV = 0.0591 
Disp = 0.0035 , BCV = 0.0591 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.01869133 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.507       18     23
 1:1.5 0.663       18     23
   1:2 0.516       18     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for GFI1
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
[1] "IRF2BP2"
[1] "AAAVS1"
   Feature IRF2BP2_1 IRF2BP2_2 IRF2BP2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6         0         0         0        0        0        0
2     DPM1      1619      1938      2043     1620     1840     1729
3    SCYL3       464       545       564      430      460      437
4 C1orf112       780       776       908      949     1277     1032
5      FGR      1443      1587      1765     2323     2401     2230
6      CFH         3         5        15        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.IRF2BP2.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16582 transcripts remain for  analysis.
A total of 13 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00024 ERCC-00048 ERCC-00057
ERCC-00061 ERCC-00075 ERCC-00083 ERCC-00086 ERCC-00098
ERCC-00117 ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
1614.75 1750.75 2094 1704 1995 1776
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
79 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00123 ERCC-00097 ERCC-00134 ERCC-00147 ERCC-00077
ERCC-00033 ERCC-00039 ERCC-00154 ERCC-00028 ERCC-00085
ERCC-00157 ERCC-00014 ERCC-00059 ERCC-00163 ERCC-00170
ERCC-00144 ERCC-00019 ERCC-00078 ERCC-00079 ERCC-00062
ERCC-00095 ERCC-00165 ERCC-00131 ERCC-00112 ERCC-00092
ERCC-00022 ERCC-00043 ERCC-00116 ERCC-00108 ERCC-00003
ERCC-00136 ERCC-00046 ERCC-00004 ERCC-00002 ERCC-00130

GLM log(r_m) estimate:
-1.12242 

GLM log(r_m) estimate weighted s.e.:
0.8872992 

Number of ERCCs in Mix 1 dyn range:  79 

Number of ERCCs in Mix 2 dyn range:  79 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00041 ERCC-00138 ERCC-00017 ERCC-00073 ERCC-00081
ERCC-00104 ERCC-00109 ERCC-00123 ERCC-00134 ERCC-00137


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.386935 7.4678 7.646831 7.440734 7.598399 7.482119 
Disp = 0.00418 , BCV = 0.0647 
Disp = 0.00419 , BCV = 0.0647 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.05686497 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.524       21     23
 1:1.5 0.567       18     23
   1:2 0.530       20     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for IRF2BP2
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
[1] "MYC"
[1] "AAAVS1"
   Feature MYC_1 MYC_2 MYC_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6     0     0     0        0        0        0
2     DPM1  2465  1701  1535     1620     1840     1729
3    SCYL3   846   672   603      430      460      437
4 C1orf112  1031   755   676      949     1277     1032
5      FGR  8556  6387  5955     2323     2401     2230
6      CFH     5     1     2        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.MYC.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  17015 transcripts remain for  analysis.
A total of 11 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00024 ERCC-00048 ERCC-00057
ERCC-00061 ERCC-00075 ERCC-00083 ERCC-00098 ERCC-00117
ERCC-00142

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2374 1836.5 1790.5 1643 1913.5 1714
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
81 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00097 ERCC-00134 ERCC-00147 ERCC-00013 ERCC-00077
ERCC-00058 ERCC-00069 ERCC-00033 ERCC-00039 ERCC-00143
ERCC-00154 ERCC-00028 ERCC-00054 ERCC-00085 ERCC-00160
ERCC-00148 ERCC-00157 ERCC-00014 ERCC-00059 ERCC-00163
ERCC-00170 ERCC-00144 ERCC-00019 ERCC-00099 ERCC-00084
ERCC-00162 ERCC-00078 ERCC-00071 ERCC-00079 ERCC-00062
ERCC-00095 ERCC-00165 ERCC-00131 ERCC-00035 ERCC-00044
ERCC-00112 ERCC-00076 ERCC-00092 ERCC-00022 ERCC-00042
ERCC-00111 ERCC-00043 ERCC-00116 ERCC-00108 ERCC-00145
ERCC-00003 ERCC-00136 ERCC-00171 ERCC-00046 ERCC-00004
ERCC-00113 ERCC-00074 ERCC-00096 ERCC-00002 ERCC-00130

GLM log(r_m) estimate:
-1.149172 

GLM log(r_m) estimate weighted s.e.:
0.8782513 

Number of ERCCs in Mix 1 dyn range:  81 

Number of ERCCs in Mix 2 dyn range:  81 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00041 ERCC-00017 ERCC-00073 ERCC-00081 ERCC-00086
ERCC-00104 ERCC-00109 ERCC-00123 ERCC-00134 ERCC-00137
ERCC-00138 ERCC-00156


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.772332 7.515617 7.49025 7.404279 7.556689 7.446585 
Disp = 0.00415 , BCV = 0.0645 
Disp = 0.00416 , BCV = 0.0645 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.1420395 
Threshold P-value is high for the chosen FDR of  0.05
The sample comparison indicates a large amount of 
 differential expression in the measured transcript 
 populations

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.543       22     23
 1:1.5 0.594       18     23
   1:2 0.562       21     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for MYC
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
[1] "LMO2"
[1] "AAAVS1"
   Feature LMO2_1 LMO2_2 LMO2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6      0      0      0        0        0        0
2     DPM1   1907   2199   2141     1620     1840     1729
3    SCYL3    561    592    644      430      460      437
4 C1orf112   1229   1188   1285      949     1277     1032
5      FGR   2777   3265   2969     2323     2401     2230
6      CFH     13      8     10        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.LMO2.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16882 transcripts remain for  analysis.
A total of 20 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00117
ERCC-00123 ERCC-00134 ERCC-00138 ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2221.75 2325 2312.5 1662 1942.5 1733
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
72 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00097 ERCC-00147 ERCC-00033 ERCC-00154 ERCC-00028
ERCC-00085 ERCC-00039 ERCC-00054 ERCC-00170 ERCC-00144
ERCC-00160 ERCC-00019 ERCC-00059 ERCC-00163 ERCC-00099
ERCC-00062 ERCC-00095 ERCC-00084 ERCC-00162 ERCC-00078
ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165 ERCC-00092
ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00042
ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136
ERCC-00145 ERCC-00003 ERCC-00171 ERCC-00004 ERCC-00046
ERCC-00113 ERCC-00074 ERCC-00096 ERCC-00130 ERCC-00002

GLM log(r_m) estimate:
0.4273754 

GLM log(r_m) estimate weighted s.e.:
0.8636267 

Number of ERCCs in Mix 1 dyn range:  72 

Number of ERCCs in Mix 2 dyn range:  72 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00120 ERCC-00137 ERCC-00158 ERCC-00164 ERCC-00168
ERCC-00073 ERCC-00109


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.70605 7.751475 7.746084 7.415777 7.571731 7.457609 
Disp = 0.00329 , BCV = 0.0574 
Disp = 0.00329 , BCV = 0.0574 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.003377844 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.494       18     23
 1:1.5 0.472       18     23
   1:2 0.491       18     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for LMO2
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
rm: cannot remove 'RNPv2.IKZF1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "IKZF1"
[1] "AAAVS1"
   Feature IKZF1_1 IKZF1_2 IKZF1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    1299    1529    2015     1620     1840     1729
3    SCYL3     361     406     571      430      460      437
4 C1orf112     836     967    1213      949     1277     1032
5      FGR    2082    1867    3154     2323     2401     2230
6      CFH       4       6       5        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.IKZF1.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16752 transcripts remain for  analysis.
A total of 22 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00013 ERCC-00016 ERCC-00017 ERCC-00024
ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075
ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104
ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138
ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
1576.25 1526 2348.25 1677 1966.25 1753
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
70 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00147 ERCC-00033 ERCC-00154 ERCC-00028 ERCC-00058
ERCC-00085 ERCC-00039 ERCC-00054 ERCC-00170 ERCC-00144
ERCC-00019 ERCC-00014 ERCC-00059 ERCC-00163 ERCC-00099
ERCC-00062 ERCC-00095 ERCC-00084 ERCC-00162 ERCC-00078
ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165 ERCC-00092
ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116
ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145
ERCC-00003 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074
ERCC-00130 ERCC-00002

GLM log(r_m) estimate:
0.4183636 

GLM log(r_m) estimate weighted s.e.:
0.8313124 

Number of ERCCs in Mix 1 dyn range:  70 

Number of ERCCs in Mix 2 dyn range:  70 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00031 ERCC-00040 ERCC-00073 ERCC-00097 ERCC-00134
ERCC-00158 ERCC-00164 ERCC-00168


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.362804 7.330405 7.761426 7.424762 7.583883 7.469084 
Disp = 0.00644 , BCV = 0.0803 
Disp = 0.00645 , BCV = 0.0803 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.00549604 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.471       19     23
 1:1.5 0.454       18     23
   1:2 0.460       16     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for IKZF1
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
[1] "MYBL2"
[1] "AAAVS1"
   Feature MYBL2_1 MYBL2_2 MYBL2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    1881    3921    1347     1620     1840     1729
3    SCYL3     469    1039     389      430      460      437
4 C1orf112    1108    2192     863      949     1277     1032
5      FGR    2573    5804    2117     2323     2401     2230
6      CFH      18      18       8        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.MYBL2.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  17053 transcripts remain for  analysis.
A total of 21 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
1865 3829 1543 1638 1906 1710
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
71 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00097 ERCC-00147 ERCC-00033 ERCC-00154 ERCC-00028
ERCC-00085 ERCC-00039 ERCC-00170 ERCC-00144 ERCC-00160
ERCC-00019 ERCC-00059 ERCC-00163 ERCC-00099 ERCC-00062
ERCC-00095 ERCC-00084 ERCC-00162 ERCC-00131 ERCC-00078
ERCC-00071 ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00044
ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108
ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003
ERCC-00171 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074
ERCC-00130 ERCC-00096 ERCC-00002

GLM log(r_m) estimate:
0.620191 

GLM log(r_m) estimate weighted s.e.:
0.8558226 

Number of ERCCs in Mix 1 dyn range:  71 

Number of ERCCs in Mix 2 dyn range:  71 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00013 ERCC-00031 ERCC-00073 ERCC-00077 ERCC-00097
ERCC-00120 ERCC-00134 ERCC-00147 ERCC-00158 ERCC-00168


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.531016 8.250359 7.341484 7.401231 7.552762 7.444249 
Disp = 0.00431 , BCV = 0.0656 
Disp = 0.00431 , BCV = 0.0656 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.0009369989 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.536       19     23
 1:1.5 0.412       18     23
   1:2 0.495       17     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for MYBL2
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
[1] "MEIS1"
[1] "AAAVS1"
   Feature MEIS1_1 MEIS1_2 MEIS1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    1916    2046    2726     1620     1840     1729
3    SCYL3     477     554     683      430      460      437
4 C1orf112    1121    1128    1408      949     1277     1032
5      FGR    1935    2193    2556     2323     2401     2230
6      CFH       7       3      12        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.MEIS1.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16907 transcripts remain for  analysis.
A total of 21 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00117
ERCC-00123 ERCC-00134 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2115 2194 2639.5 1658 1938 1730
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
71 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00097 ERCC-00147 ERCC-00013 ERCC-00077 ERCC-00033
ERCC-00154 ERCC-00028 ERCC-00085 ERCC-00069 ERCC-00039
ERCC-00054 ERCC-00170 ERCC-00144 ERCC-00160 ERCC-00157
ERCC-00019 ERCC-00014 ERCC-00059 ERCC-00163 ERCC-00062
ERCC-00095 ERCC-00084 ERCC-00162 ERCC-00078 ERCC-00131
ERCC-00071 ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00044
ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108
ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003
ERCC-00171 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074
ERCC-00130 ERCC-00002

GLM log(r_m) estimate:
0.4541636 

GLM log(r_m) estimate weighted s.e.:
0.8992704 

Number of ERCCs in Mix 1 dyn range:  71 

Number of ERCCs in Mix 2 dyn range:  71 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00097 ERCC-00164 ERCC-00168 ERCC-00073 ERCC-00109


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.65681 7.693482 7.878345 7.413367 7.569412 7.455877 
Disp = 0.00363 , BCV = 0.0603 
Disp = 0.00364 , BCV = 0.0603 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.00638019 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.429       18     23
 1:1.5 0.586       18     23
   1:2 0.605       17     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for MEIS1
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
[1] "IRF8"
[1] "AAAVS1"
   Feature IRF8_1 IRF8_2 IRF8_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6      0      0      0        0        0        0
2     DPM1   2211   2243   2269     1620     1840     1729
3    SCYL3    611    621    622      430      460      437
4 C1orf112   1390   1268   1244      949     1277     1032
5      FGR   3652   3917   4442     2323     2401     2230
6      CFH     16     17     15        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.IRF8.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16800 transcripts remain for  analysis.
A total of 18 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048
ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083
ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109 ERCC-00117
ERCC-00123 ERCC-00138 ERCC-00142

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2385 2327 2453 1672.25 1957.5 1744.25
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
74 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00097 ERCC-00147 ERCC-00077 ERCC-00033 ERCC-00154
ERCC-00028 ERCC-00058 ERCC-00039 ERCC-00085 ERCC-00160
ERCC-00170 ERCC-00144 ERCC-00157 ERCC-00019 ERCC-00059
ERCC-00163 ERCC-00099 ERCC-00062 ERCC-00095 ERCC-00084
ERCC-00162 ERCC-00078 ERCC-00071 ERCC-00131 ERCC-00079
ERCC-00165 ERCC-00092 ERCC-00044 ERCC-00076 ERCC-00112
ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043
ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00171 ERCC-00004
ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00130 ERCC-00002

GLM log(r_m) estimate:
0.2633745 

GLM log(r_m) estimate weighted s.e.:
0.8414289 

Number of ERCCs in Mix 1 dyn range:  74 

Number of ERCCs in Mix 2 dyn range:  74 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00012 ERCC-00013 ERCC-00134 ERCC-00137 ERCC-00164
ERCC-00168 ERCC-00073 ERCC-00156


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.776954 7.752335 7.805067 7.421925 7.579423 7.46408 
Disp = 0.00426 , BCV = 0.0653 
Disp = 0.00427 , BCV = 0.0654 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.02567527 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.453       20     23
 1:1.5 0.533       19     23
   1:2 0.608       18     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for IRF8
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
[1] "ELF2"
[1] "AAAVS1"
   Feature ELF2_1 ELF2_2 ELF2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6      0      0      0        0        0        0
2     DPM1   2516   1913   1971     1620     1840     1729
3    SCYL3    640    486    584      430      460      437
4 C1orf112   1315   1056   1278      949     1277     1032
5      FGR   3206   2242   2711     2323     2401     2230
6      CFH      4      8      5        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.ELF2.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16904 transcripts remain for  analysis.
A total of 21 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00134 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2414.75 1863 2194 1658.5 1938.5 1731
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
71 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00097 ERCC-00147 ERCC-00033 ERCC-00154 ERCC-00028
ERCC-00058 ERCC-00069 ERCC-00085 ERCC-00039 ERCC-00170
ERCC-00144 ERCC-00160 ERCC-00019 ERCC-00059 ERCC-00163
ERCC-00099 ERCC-00062 ERCC-00095 ERCC-00084 ERCC-00162
ERCC-00078 ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165
ERCC-00092 ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00022
ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136
ERCC-00145 ERCC-00003 ERCC-00171 ERCC-00004 ERCC-00046
ERCC-00113 ERCC-00074 ERCC-00096 ERCC-00130 ERCC-00002

GLM log(r_m) estimate:
0.4388019 

GLM log(r_m) estimate weighted s.e.:
0.8523992 

Number of ERCCs in Mix 1 dyn range:  71 

Number of ERCCs in Mix 2 dyn range:  71 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00040 ERCC-00073 ERCC-00120 ERCC-00123 ERCC-00164


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.789351 7.529943 7.693482 7.413669 7.56967 7.456455 
Disp = 0.00472 , BCV = 0.0687 
Disp = 0.00472 , BCV = 0.0687 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.0003092106 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.502       19     23
 1:1.5 0.529       18     23
   1:2 0.453       17     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for ELF2
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
rm: cannot remove 'RNPv2.SP1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "SP1"
[1] "AAAVS1"
   Feature SP1_1 SP1_2 SP1_3 SP1_4 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6     0     0     0     0        0        0        0
2     DPM1  1926  1846  1915  2633     1620     1840     1729
3    SCYL3   572   507   580   713      430      460      437
4 C1orf112   783  1088  1184  1572      949     1277     1032
5      FGR  2016  2285  2384  3106     2323     2401     2230
6      CFH    15    13    15    15        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.SP1.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16944 transcripts remain for  analysis.
A total of 15 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00048
ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00083 ERCC-00086
ERCC-00098 ERCC-00117 ERCC-00138 ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2097 1924 2233.5 2745 1653.25 1933 1726
Check for sample mRNA fraction differences(r_m)...
R[write to console]: Error in dimnames(x) <- dn : 
  length of 'dimnames' [2] not equal to array extent
Calls: <Anonymous> ... withVisible -> runDashboard -> est_r_m -> colnames<-

Error in dimnames(x) <- dn : 
  length of 'dimnames' [2] not equal to array extent
Calls: <Anonymous> ... withVisible -> runDashboard -> est_r_m -> colnames<-
   Length     Class      Mode 
        1 character character 
R[write to console]: Error in exDat$Figures : $ operator is invalid for atomic vectors
Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob

Error in exDat$Figures : $ operator is invalid for atomic vectors
Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
R[write to console]: Error in exDat$Figures : $ operator is invalid for atomic vectors
Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob

Error in exDat$Figures : $ operator is invalid for atomic vectors
Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
R[write to console]: Error in exDat$Figures : $ operator is invalid for atomic vectors
Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob

Error in exDat$Figures : $ operator is invalid for atomic vectors
Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
R[write to console]: Error in exDat$Figures : $ operator is invalid for atomic vectors
Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob

Error in exDat$Figures : $ operator is invalid for atomic vectors
Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
[1] "LYL1"
[1] "AAAVS1"
   Feature LYL1_1 LYL1_2 LYL1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6      0      0      0        0        0        0
2     DPM1   1954   1656   2061     1620     1840     1729
3    SCYL3    572    428    588      430      460      437
4 C1orf112   1241    952   1107      949     1277     1032
5      FGR   2786   2397   3052     2323     2401     2230
6      CFH      7     14     13        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.LYL1.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16829 transcripts remain for  analysis.
A total of 20 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2302 1853 2252 1669 1951 1743
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
72 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00097 ERCC-00147 ERCC-00077 ERCC-00033 ERCC-00154
ERCC-00028 ERCC-00085 ERCC-00039 ERCC-00054 ERCC-00170
ERCC-00160 ERCC-00144 ERCC-00157 ERCC-00019 ERCC-00059
ERCC-00163 ERCC-00051 ERCC-00099 ERCC-00062 ERCC-00095
ERCC-00084 ERCC-00162 ERCC-00078 ERCC-00071 ERCC-00131
ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00044 ERCC-00076
ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111
ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00171
ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00096
ERCC-00130 ERCC-00002

GLM log(r_m) estimate:
0.3354632 

GLM log(r_m) estimate weighted s.e.:
0.8471587 

Number of ERCCs in Mix 1 dyn range:  72 

Number of ERCCs in Mix 2 dyn range:  72 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00097 ERCC-00134 ERCC-00168 ERCC-00073 ERCC-00123


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.741534 7.524561 7.719574 7.41998 7.576097 7.463363 
Disp = 0.00315 , BCV = 0.0561 
Disp = 0.00315 , BCV = 0.0562 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.006084115 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.491       20     23
 1:1.5 0.503       18     23
   1:2 0.578       17     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for LYL1
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
[1] "CEBPA"
[1] "AAAVS1"
   Feature CEBPA_1 CEBPA_2 CEBPA_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    1418     547    1781     1620     1840     1729
3    SCYL3     459     177     589      430      460      437
4 C1orf112     908     426    1171      949     1277     1032
5      FGR    1659     648    1791     2323     2401     2230
6      CFH       7       1      10        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.CEBPA.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16595 transcripts remain for  analysis.
A total of 22 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00013 ERCC-00016 ERCC-00017 ERCC-00024
ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075
ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104
ERCC-00109 ERCC-00117 ERCC-00134 ERCC-00137 ERCC-00138
ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
1763 743 2081.5 1704 1993.5 1775
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
70 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00147 ERCC-00077 ERCC-00033 ERCC-00154 ERCC-00028
ERCC-00058 ERCC-00085 ERCC-00143 ERCC-00170 ERCC-00144
ERCC-00014 ERCC-00059 ERCC-00163 ERCC-00099 ERCC-00062
ERCC-00095 ERCC-00078 ERCC-00071 ERCC-00131 ERCC-00079
ERCC-00165 ERCC-00092 ERCC-00044 ERCC-00076 ERCC-00112
ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043
ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00004 ERCC-00046
ERCC-00113 ERCC-00074 ERCC-00130 ERCC-00002

GLM log(r_m) estimate:
0.2735703 

GLM log(r_m) estimate weighted s.e.:
0.9241978 

Number of ERCCs in Mix 1 dyn range:  70 

Number of ERCCs in Mix 2 dyn range:  70 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00067 ERCC-00073 ERCC-00097 ERCC-00120 ERCC-00123
ERCC-00147 ERCC-00158 ERCC-00164 ERCC-00168


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.474772 6.610696 7.640844 7.440734 7.597647 7.481556 
Disp = 0.00549 , BCV = 0.0741 
Disp = 0.00549 , BCV = 0.0741 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.05536443 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.610       19     23
 1:1.5 0.637       18     23
   1:2 0.592       16     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for CEBPA
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
[1] "ZEB2"
[1] "AAAVS1"
   Feature ZEB2_1 ZEB2_2 ZEB2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6      0      0      0        0        0        0
2     DPM1   2361   2261   1810     1620     1840     1729
3    SCYL3    531    527    481      430      460      437
4 C1orf112   1086   1059    945      949     1277     1032
5      FGR   2523   2566   2552     2323     2401     2230
6      CFH      1      1      0        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.ZEB2.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16877 transcripts remain for  analysis.
A total of 19 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00013 ERCC-00016 ERCC-00017 ERCC-00024
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00138 ERCC-00142

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2103 2164 2008 1663 1944 1734
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
73 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00097 ERCC-00147 ERCC-00077 ERCC-00154 ERCC-00028
ERCC-00058 ERCC-00039 ERCC-00143 ERCC-00085 ERCC-00054
ERCC-00160 ERCC-00170 ERCC-00144 ERCC-00014 ERCC-00019
ERCC-00059 ERCC-00163 ERCC-00099 ERCC-00062 ERCC-00084
ERCC-00095 ERCC-00162 ERCC-00078 ERCC-00071 ERCC-00131
ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00044 ERCC-00076
ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111
ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00004
ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00130 ERCC-00002

GLM log(r_m) estimate:
0.07284839 

GLM log(r_m) estimate weighted s.e.:
0.9009232 

Number of ERCCs in Mix 1 dyn range:  73 

Number of ERCCs in Mix 2 dyn range:  73 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00031 ERCC-00041 ERCC-00097 ERCC-00120 ERCC-00156
ERCC-00158 ERCC-00164 ERCC-00073 ERCC-00134 ERCC-00137


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.65112 7.679714 7.604894 7.416378 7.572503 7.458186 
Disp = 0.00416 , BCV = 0.0645 
Disp = 0.00417 , BCV = 0.0646 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.06343123 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.435       20     23
 1:1.5 0.559       18     23
   1:2 0.644       18     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for ZEB2
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
[1] "MEF2D"
[1] "AAAVS1"
   Feature MEF2D_1 MEF2D_2 MEF2D_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    1983    2451    2378     1620     1840     1729
3    SCYL3     542     670     576      430      460      437
4 C1orf112    1163    1481    1332      949     1277     1032
5      FGR    3680    4706    4308     2323     2401     2230
6      CFH      17      12      14        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.MEF2D.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  17024 transcripts remain for  analysis.
A total of 17 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00048
ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083
ERCC-00086 ERCC-00098 ERCC-00109 ERCC-00117 ERCC-00123
ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2150.25 2742.25 2546 1642 1913 1713
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
75 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00097 ERCC-00147 ERCC-00013 ERCC-00077 ERCC-00033
ERCC-00154 ERCC-00028 ERCC-00069 ERCC-00039 ERCC-00085
ERCC-00054 ERCC-00160 ERCC-00170 ERCC-00157 ERCC-00144
ERCC-00014 ERCC-00019 ERCC-00059 ERCC-00163 ERCC-00099
ERCC-00062 ERCC-00095 ERCC-00084 ERCC-00162 ERCC-00078
ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165 ERCC-00060
ERCC-00092 ERCC-00044 ERCC-00076 ERCC-00112 ERCC-00022
ERCC-00116 ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136
ERCC-00145 ERCC-00003 ERCC-00004 ERCC-00046 ERCC-00113
ERCC-00074 ERCC-00130 ERCC-00002

GLM log(r_m) estimate:
0.1296358 

GLM log(r_m) estimate weighted s.e.:
0.8698226 

Number of ERCCs in Mix 1 dyn range:  75 

Number of ERCCs in Mix 2 dyn range:  75 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00041 ERCC-00134 ERCC-00073 ERCC-00104 ERCC-00137
ERCC-00138


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.673339 7.916534 7.842279 7.40367 7.556428 7.446001 
Disp = 0.0018 , BCV = 0.0424 
Disp = 0.00181 , BCV = 0.0425 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.03637135 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.485       19     23
 1:1.5 0.459       18     23
   1:2 0.562       19     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for MEF2D
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
[1] "ZMYND8"
[1] "AAAVS1"
   Feature ZMYND8_1 ZMYND8_2 ZMYND8_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6        0        0        0        0        0        0
2     DPM1     2140     1697     1859     1620     1840     1729
3    SCYL3      608      551      661      430      460      437
4 C1orf112     1311     1123     1319      949     1277     1032
5      FGR     4209     3864     4504     2323     2401     2230
6      CFH        8        6        7        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.ZMYND8.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  17092 transcripts remain for  analysis.
A total of 21 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2342 2038.25 2372 1633 1900.25 1707
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
71 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00097 ERCC-00147 ERCC-00077 ERCC-00033 ERCC-00154
ERCC-00028 ERCC-00039 ERCC-00085 ERCC-00160 ERCC-00170
ERCC-00144 ERCC-00019 ERCC-00014 ERCC-00059 ERCC-00163
ERCC-00062 ERCC-00095 ERCC-00162 ERCC-00078 ERCC-00071
ERCC-00131 ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00076
ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108 ERCC-00111
ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003 ERCC-00004
ERCC-00046 ERCC-00113 ERCC-00074 ERCC-00130 ERCC-00002

GLM log(r_m) estimate:
0.1990095 

GLM log(r_m) estimate weighted s.e.:
0.8644413 

Number of ERCCs in Mix 1 dyn range:  71 

Number of ERCCs in Mix 2 dyn range:  71 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00040 ERCC-00120 ERCC-00134 ERCC-00168 ERCC-00073


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.758761 7.619847 7.771489 7.398174 7.549741 7.442493 
Disp = 0.0041 , BCV = 0.064 
Disp = 0.0041 , BCV = 0.0641 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.009284189 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.495       19     23
 1:1.5 0.556       18     23
   1:2 0.595       17     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for ZMYND8
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
[1] "RUNX1"
[1] "AAAVS1"
   Feature RUNX1_1 RUNX1_2 RUNX1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    1863    2093    2027     1620     1840     1729
3    SCYL3     577     617     601      430      460      437
4 C1orf112    1232    1209    1309      949     1277     1032
5      FGR    2359    2615    2258     2323     2401     2230
6      CFH       8       9       7        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.RUNX1.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  17128 transcripts remain for  analysis.
A total of 22 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00134 ERCC-00137 ERCC-00138
ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2247.25 2328 2294.25 1629 1895 1703
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
70 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00097 ERCC-00147 ERCC-00077 ERCC-00033 ERCC-00154
ERCC-00028 ERCC-00069 ERCC-00085 ERCC-00160 ERCC-00170
ERCC-00144 ERCC-00157 ERCC-00014 ERCC-00019 ERCC-00059
ERCC-00163 ERCC-00062 ERCC-00095 ERCC-00162 ERCC-00078
ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165 ERCC-00092
ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108
ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003
ERCC-00004 ERCC-00046 ERCC-00074 ERCC-00130 ERCC-00002

GLM log(r_m) estimate:
0.1358976 

GLM log(r_m) estimate weighted s.e.:
0.8532505 

Number of ERCCs in Mix 1 dyn range:  70 

Number of ERCCs in Mix 2 dyn range:  70 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00031 ERCC-00097 ERCC-00120 ERCC-00168 ERCC-00073


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.717463 7.752765 7.738161 7.395722 7.546974 7.440147 
Disp = 0.00256 , BCV = 0.0506 
Disp = 0.00256 , BCV = 0.0506 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.02473796 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.559       18     23
 1:1.5 0.539       18     23
   1:2 0.564       17     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for RUNX1
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
rm: cannot remove 'RNPv2.FLI1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "FLI1"
[1] "AAAVS1"
   Feature FLI1_1 FLI1_2 FLI1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6      0      0      0        0        0        0
2     DPM1   1892   2087   2588     1620     1840     1729
3    SCYL3    450    555    668      430      460      437
4 C1orf112   1196   1338   1591      949     1277     1032
5      FGR   2480   2602   3360     2323     2401     2230
6      CFH      3      3      4        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.FLI1.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16821 transcripts remain for  analysis.
A total of 21 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2055 2218 2616 1669 1953 1743
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
71 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00097 ERCC-00147 ERCC-00033 ERCC-00154 ERCC-00028
ERCC-00085 ERCC-00069 ERCC-00039 ERCC-00143 ERCC-00054
ERCC-00170 ERCC-00144 ERCC-00160 ERCC-00019 ERCC-00014
ERCC-00059 ERCC-00163 ERCC-00099 ERCC-00062 ERCC-00095
ERCC-00084 ERCC-00162 ERCC-00078 ERCC-00131 ERCC-00071
ERCC-00079 ERCC-00165 ERCC-00035 ERCC-00092 ERCC-00044
ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108
ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003
ERCC-00171 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074
ERCC-00096 ERCC-00130 ERCC-00002

GLM log(r_m) estimate:
0.487796 

GLM log(r_m) estimate weighted s.e.:
0.840473 

Number of ERCCs in Mix 1 dyn range:  71 

Number of ERCCs in Mix 2 dyn range:  71 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00013 ERCC-00097 ERCC-00120 ERCC-00134 ERCC-00164
ERCC-00073


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.628031 7.704361 7.869402 7.41998 7.577122 7.463363 
Disp = 0.00243 , BCV = 0.0493 
Disp = 0.00243 , BCV = 0.0493 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.002016927 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.464       19     23
 1:1.5 0.493       18     23
   1:2 0.491       17     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for FLI1
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
[1] "HOXA9"
[1] "AAAVS1"
   Feature HOXA9_1 HOXA9_2 HOXA9_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    1842    2075    2081     1620     1840     1729
3    SCYL3     516     575     602      430      460      437
4 C1orf112    1174    1241    1190      949     1277     1032
5      FGR    2239    2364    2372     2323     2401     2230
6      CFH       4      10       8        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.HOXA9.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16777 transcripts remain for  analysis.
A total of 21 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2114 2247 2145 1675 1962 1750
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
71 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00147 ERCC-00033 ERCC-00154 ERCC-00028 ERCC-00058
ERCC-00085 ERCC-00039 ERCC-00170 ERCC-00144 ERCC-00160
ERCC-00019 ERCC-00014 ERCC-00059 ERCC-00099 ERCC-00062
ERCC-00095 ERCC-00162 ERCC-00131 ERCC-00078 ERCC-00071
ERCC-00079 ERCC-00165 ERCC-00092 ERCC-00035 ERCC-00044
ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116 ERCC-00108
ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145 ERCC-00003
ERCC-00171 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074
ERCC-00130 ERCC-00002

GLM log(r_m) estimate:
0.7498464 

GLM log(r_m) estimate weighted s.e.:
0.8841697 

Number of ERCCs in Mix 1 dyn range:  71 

Number of ERCCs in Mix 2 dyn range:  71 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00040 ERCC-00073 ERCC-00097 ERCC-00120 ERCC-00134
ERCC-00147 ERCC-00164


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.656337 7.717351 7.670895 7.423568 7.58172 7.467371 
Disp = 0.00275 , BCV = 0.0525 
Disp = 0.00276 , BCV = 0.0525 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.002819864 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.533       19     23
 1:1.5 0.614       18     23
   1:2 0.550       17     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for HOXA9
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
rm: cannot remove 'RNPv2.MYB.AAAVS1.All.Pvals.csv': No such file or directory
[1] "MYB"
[1] "AAAVS1"
   Feature MYB_1 MYB_2 MYB_3 MYB_4 MYB_5 MYB_6 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6     0     0     0     0     0     0        0        0        0
2     DPM1  1695  1557  1288  1881  3921  1347     1620     1840     1729
3    SCYL3   582   482   460   469  1039   389      430      460      437
4 C1orf112   831   825   776  1108  2192   863      949     1277     1032
5      FGR  3674  3220  2807  2573  5804  2117     2323     2401     2230
6      CFH    10    17    11    18    18     8        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.MYB.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  17270 transcripts remain for  analysis.
A total of 21 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
1880.75 1759.75 1505 1834.75 3750.25 1514 1613.75 1874.75 1688.75
Check for sample mRNA fraction differences(r_m)...
R[write to console]: Error in dimnames(x) <- dn : 
  length of 'dimnames' [2] not equal to array extent
Calls: <Anonymous> ... withVisible -> runDashboard -> est_r_m -> colnames<-

Error in dimnames(x) <- dn : 
  length of 'dimnames' [2] not equal to array extent
Calls: <Anonymous> ... withVisible -> runDashboard -> est_r_m -> colnames<-
   Length     Class      Mode 
        1 character character 
R[write to console]: Error in exDat$Figures : $ operator is invalid for atomic vectors
Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob

Error in exDat$Figures : $ operator is invalid for atomic vectors
Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
R[write to console]: Error in exDat$Figures : $ operator is invalid for atomic vectors
Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob

Error in exDat$Figures : $ operator is invalid for atomic vectors
Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
R[write to console]: Error in exDat$Figures : $ operator is invalid for atomic vectors
Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob

Error in exDat$Figures : $ operator is invalid for atomic vectors
Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
R[write to console]: Error in exDat$Figures : $ operator is invalid for atomic vectors
Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob

Error in exDat$Figures : $ operator is invalid for atomic vectors
Calls: <Anonymous> ... <Anonymous> -> withVisible -> grid.arrange -> arrangeGrob
[1] "MAX"
[1] "AAAVS1"
   Feature MAX_1 MAX_2 MAX_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6     0     0     0        0        0        0
2     DPM1  1811  2032  2172     1620     1840     1729
3    SCYL3   571   656   742      430      460      437
4 C1orf112  1215  1387  1393      949     1277     1032
5      FGR  3640  4163  4084     2323     2401     2230
6      CFH     9     5     3        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.MAX.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16957 transcripts remain for  analysis.
A total of 15 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00048
ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083
ERCC-00086 ERCC-00098 ERCC-00117 ERCC-00138 ERCC-00142

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2142 2502 2512 1651 1928 1725
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
77 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00123 ERCC-00097 ERCC-00134 ERCC-00147 ERCC-00077
ERCC-00058 ERCC-00033 ERCC-00154 ERCC-00069 ERCC-00028
ERCC-00039 ERCC-00143 ERCC-00085 ERCC-00054 ERCC-00160
ERCC-00157 ERCC-00014 ERCC-00059 ERCC-00170 ERCC-00163
ERCC-00144 ERCC-00019 ERCC-00099 ERCC-00084 ERCC-00162
ERCC-00078 ERCC-00062 ERCC-00095 ERCC-00071 ERCC-00079
ERCC-00131 ERCC-00165 ERCC-00044 ERCC-00076 ERCC-00112
ERCC-00092 ERCC-00022 ERCC-00111 ERCC-00043 ERCC-00116
ERCC-00108 ERCC-00145 ERCC-00003 ERCC-00136 ERCC-00171
ERCC-00046 ERCC-00004 ERCC-00113 ERCC-00074 ERCC-00096
ERCC-00002 ERCC-00130

GLM log(r_m) estimate:
-0.4981285 

GLM log(r_m) estimate weighted s.e.:
0.8587386 

Number of ERCCs in Mix 1 dyn range:  77 

Number of ERCCs in Mix 2 dyn range:  77 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00123 ERCC-00134 ERCC-00168 ERCC-00041 ERCC-00073
ERCC-00104 ERCC-00109 ERCC-00137 ERCC-00156


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.669495 7.824846 7.828835 7.409136 7.564238 7.452982 
Disp = 0.0026 , BCV = 0.051 
Disp = 0.0026 , BCV = 0.051 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.0321099 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.607       21     23
 1:1.5 0.635       18     23
   1:2 0.632       19     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for MAX
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
[1] "MEF2C"
[1] "AAAVS1"
   Feature MEF2C_1 MEF2C_2 MEF2C_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    1877    1951    1803     1620     1840     1729
3    SCYL3     459     498     519      430      460      437
4 C1orf112    1127    1049    1138      949     1277     1032
5      FGR    2652    3037    2824     2323     2401     2230
6      CFH       3       7       5        6        5        9

Initializing the exDat list structure...
choseFDR = 0.05 
repNormFactor is NULL 
Filename root is: RNPv2.MEF2C.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16818 transcripts remain for  analysis.
A total of 21 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00134 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
1959.75 2084 2098.75 1669.75 1953.75 1743.75
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
71 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00097 ERCC-00147 ERCC-00077 ERCC-00033 ERCC-00154
ERCC-00028 ERCC-00058 ERCC-00039 ERCC-00054 ERCC-00170
ERCC-00160 ERCC-00144 ERCC-00019 ERCC-00014 ERCC-00059
ERCC-00163 ERCC-00099 ERCC-00062 ERCC-00095 ERCC-00162
ERCC-00078 ERCC-00071 ERCC-00131 ERCC-00079 ERCC-00165
ERCC-00092 ERCC-00076 ERCC-00112 ERCC-00022 ERCC-00116
ERCC-00108 ERCC-00111 ERCC-00043 ERCC-00136 ERCC-00145
ERCC-00003 ERCC-00004 ERCC-00046 ERCC-00113 ERCC-00074
ERCC-00130 ERCC-00002

GLM log(r_m) estimate:
0.3739488 

GLM log(r_m) estimate weighted s.e.:
0.8715334 

Number of ERCCs in Mix 1 dyn range:  71 

Number of ERCCs in Mix 2 dyn range:  71 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00013 ERCC-00097 ERCC-00123 ERCC-00164 ERCC-00168
ERCC-00073


Saving dynRangePlot to exDat

Starting differential expression tests

Show log.offset
7.580572 7.642044 7.649097 7.420429 7.577506 7.463793 
Disp = 0.00253 , BCV = 0.0503 
Disp = 0.00253 , BCV = 0.0503 
Finished DE testing
Finished examining dispersions

Threshold P-value
0.0009875534 

Generating ROC curve and AUC statistics...

Area Under the Curve (AUC) Results:
 Ratio   AUC Detected Spiked
   4:1 0.526       19     23
 1:1.5 0.592       18     23
   1:2 0.467       17     23

Estimating ERCC LODR
.............................................
 Ratio LODR Estimate 90% CI Lower Bound 90% CI Upper Bound
   4:1           Inf               <NA>               <NA>
 1:1.5           Inf               <NA>               <NA>
   1:2           Inf               <NA>               <NA>

Warning! Estimated distribution of p-values does not cross threshold p-value,
 may be due to insufficient data quantity
 Consider adjusting FDR choice.

LODR estimates are available to code ratio-abundance plot

Saving main dashboard plots to pdf file...

Saving exDat list to .RData file...
Analysis completed.
R[write to console]: Error in dev.off() : cannot shut down device 1 (the null device)
Calls: <Anonymous> -> <Anonymous> -> <Anonymous> -> dev.off

worked for MEF2C
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results       12     -none-     list     
Figures        7     -none-     list     
In [40]:
%matplotlib inline
ig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(data.corr(), 
            xticklabels=data.columns,
            yticklabels=data.columns, ax=ax)
Out[40]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f1f323aef10>
In [177]:
model = AgglomerativeClustering(n_clusters=15,linkage="average", 
                                affinity="cosine", compute_full_tree=True)
labels = model.fit_predict(data.corr())
ii = itertools.count(data.shape[0])
tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]
sort = labels.argsort()
In [41]:
%matplotlib inline

sns.clustermap(data.corr(), figsize=(20, 20))

plt.savefig('../data/RNPv2_cluster_count.pdf')
In [179]:
data.sum().tolist()
Out[179]:
[31194860.27000039,
 34734170.910000145,
 41947063.61999977,
 46794854.38000023,
 45959725.04999988,
 48187669.949999854,
 43703179.22999995,
 54815404.069999784,
 51453432.84000005,
 45694014.92000012,
 37739408.16000016,
 35925369.88000013,
 45939275.84999983,
 46049236.90999998,
 47474159.87999978,
 48525076.05999996,
 45690646.539999746,
 45157321.31999988,
 56639651.62999975,
 41764180.25999997,
 53047868.079999454,
 45963304.22999989,
 42284214.549999595,
 47507365.27999984,
 43762796.11999972,
 45382911.53999989,
 46972864.209999934,
 45345593.949999996,
 37246793.10999977,
 44768420.24999964,
 42046067.34999971,
 50800605.66999957,
 51176436.25999986,
 42939652.28999985,
 44136137.289999895,
 40740731.69999998,
 38508207.550000004,
 41500257.68999979,
 41227894.83000014,
 43337577.789999865,
 43352847.28999995,
 51316363.68999997,
 40072110.34000017,
 43282705.06999982,
 51083598.04999976,
 47140394.049999766,
 37620883.43999992,
 44039610.83999986,
 61484638.129999965,
 20045963.380000293,
 38556072.189999774,
 31634429.490000147,
 29835972.010000307,
 47235734.2699997,
 34097279.28000006,
 14896010.669999905,
 40029165.88999997,
 38726353.51999988,
 37015620.039999984,
 31655845.910000257,
 37291884.63999993,
 77020486.5900007,
 76035190.80000074,
 80821407.53000104,
 88932208.80000061,
 96200436.35000083,
 33570457.16000021,
 39525165.01000017,
 35056555.59000006]
In [120]:
data.shape
Out[120]:
(26580, 73)

DESEQ ANALYSIS

In [119]:
data
Out[119]:
mr120-MV411-RNP_IRF2BP2-r4 mr129-MV411-RNP_MYC-r4 mr130-MV411-RNP_MYC-r5 mr131-MV411-RNP_MYC-r6 mr132-MV411-RNP_RUNX1-r4 mr133-MV411-RNP_RUNX1-r5 mr134-MV411-RNP_RUNX1-r6 mr135-MV411-RNP_RUNX2-r4 mr136-MV411-RNP_RUNX2-r5 mr137-MV411-RNP_RUNX2-r6 ... mr186-MV411-RNP_AAVS1-r1 mr187-MV411-RNP_AAVS1-r2 mr188-MV411-RNP_AAVS1-r3 mr126-MV411-RNP_MEF2D-r4 mr189-MV411-RNP_SP1-r4 mr190-MV411-RNP_SP1-r5 mr191-MV411-RNP_SP1-r6 mr192-MV411-RNP_SP1-r7 mr127-MV411-RNP_MEF2D-r5 mr128-MV411-RNP_MEF2D-r6
gene_id
TSPAN6 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00
DPM1 1619.00 2465.00 1701.00 1535.00 1863.00 2093.00 2027.00 2202.00 2148.00 2235.00 ... 1620.00 1840.00 1729.00 1983.00 1926.0 1846.00 1915.00 2633.00 2451.00 2378.00
SCYL3 464.57 846.12 672.69 603.75 577.41 617.97 601.43 545.49 575.14 536.97 ... 430.78 460.04 437.36 542.42 572.5 507.48 580.49 713.56 670.02 576.38
C1orf112 780.43 1031.90 755.31 676.25 1232.70 1209.00 1309.60 1370.50 1245.90 1257.10 ... 949.22 1277.00 1032.60 1163.60 783.5 1088.50 1184.50 1572.40 1481.00 1332.90
FGR 1443.00 8556.00 6387.00 5955.00 2359.00 2615.00 2258.00 3340.00 3229.00 3466.00 ... 2323.00 2401.00 2230.00 3680.00 2016.0 2285.00 2384.00 3106.00 4706.00 4308.00
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
BMP8B-AS1 3.00 2.00 2.00 4.00 10.00 9.00 9.00 8.00 4.00 7.00 ... 6.00 5.00 4.00 3.00 3.0 6.00 7.00 10.00 3.00 7.00
H2AL1SP 0.00 0.00 0.00 0.00 0.00 0.00 1.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00
NIPBL-DT 462.00 650.00 478.00 431.00 777.00 829.00 782.00 709.00 743.00 776.00 ... 497.00 653.00 673.00 889.00 673.0 628.00 871.00 962.00 1099.00 1024.00
CERNA2 2.00 7.00 8.00 3.00 13.00 6.00 24.00 9.00 8.00 12.00 ... 4.00 10.00 10.00 3.00 0.0 18.00 28.00 28.00 1.00 7.06
LINC02689 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00

26580 rows × 73 columns

In [121]:
experiments = list(set([i.split('-')[2] for i in data.columns[:-1]]))
In [122]:
experiments.remove("RNP_AAVS1")
In [126]:
data['gene_id'] = data.index
In [123]:
experiments
Out[123]:
['RNP_SPI1',
 'RNP_RUNX2',
 'RNP_GFI1',
 'RNP_IRF2BP2',
 'RNP_MYC',
 'RNP_LMO2',
 'RNP_IKZF1',
 'RNP_MYBL2',
 'RNP_MEIS1',
 'RNP_IRF8',
 'RNP_ELF2',
 'RNP_SP1',
 'RNP_LYL1',
 'RNP_CEBPA',
 'RNP_ZEB2',
 'RNP_MEF2D',
 'RNP_ZMYND8',
 'RNP_RUNX1',
 'RNP_FLI1',
 'RNP_HOXA9',
 'RNP_MYB',
 'RNP_MAX',
 'RNP_MEF2C']
In [127]:
for val in experiments:  
    design = pd.DataFrame(index=data.columns[:-1], columns=['DMSO','Target'], 
                          data=np.array([[1 if 'RNP_AAVS1' in i else 0 for i in data.columns[:-1]],[1 if val in i else 0 for i in data.columns[:-1]]]).T)
    design.index = design.index.astype(str).str.replace('-','.')
    deseq = pyDESeq2.pyDESeq2(count_matrix=data, design_matrix = design, 
                              design_formula='~DMSO + Target', gene_column="gene_id")
    deseq.run_deseq()
    deseq.get_deseq_result()
    r = deseq.deseq_result
    r.pvalue = np.nan_to_num(np.array(r.pvalue), 1)
    r.log2FoldChange = np.nan_to_num(np.array(r.log2FoldChange), 0)
    results[val] = r
3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 211 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 209 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 203 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 208 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 213 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 205 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 213 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 208 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 209 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 208 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 211 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 353 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 209 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 210 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 155 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 209 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 202 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 208 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 209 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 207 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 152 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 208 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

3.2.6
R[write to console]: estimating size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 205 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

CTF (volcano)

In [ ]:
results
In [128]:
for val in experiments:
    a = h.volcano(results[val],tohighlight=ctf,title=val, maxvalue= 60, searchbox=True, minlogfold=0.5)
    try:
        show(a)
    except RuntimeError:
        show(a)

any bias in the data

In [126]:
datad = data
In [127]:
data = data.drop(columns='mr129-MV411-RNP_MYC-r4')
In [129]:
col = {v:i for i, v in enumerate(set([i.split('-')[2] for i in data.columns[:-1]]))}
In [130]:
red = PCA(2).fit_transform(data[data.columns[:-1]].T)
h.scatter(red, labels=data.columns[:-1], radi=60000, colors=[col[i.split('-')[2]] for i in data.columns[:-1]])
Out[130]:
Figure(
id = '7305', …)
In [131]:
red = PCA(30).fit_transform(data[data.columns[:-1]].T)
red = TSNE(2,4).fit_transform(red)
In [117]:
red.shape
Out[117]:
(68, 2)

mr129-MYC-r4 seems weird

In [132]:
h.scatter(red, labels=data.columns[:-1], radi=10, colors=[col[i.split('-')[2]] for i in data.columns[:-1]])
Out[132]:
Figure(
id = '7708', …)
In [133]:
pca = PCA(20)
red = pca.fit_transform(data.T)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-133-3ada5fe15c08> in <module>
      1 pca = PCA(20)
----> 2 red = pca.fit_transform(data.T)

~/.local/lib/python3.7/site-packages/sklearn/decomposition/_pca.py in fit_transform(self, X, y)
    367         C-ordered array, use 'np.ascontiguousarray'.
    368         """
--> 369         U, S, V = self._fit(X)
    370         U = U[:, :self.n_components_]
    371 

~/.local/lib/python3.7/site-packages/sklearn/decomposition/_pca.py in _fit(self, X)
    389 
    390         X = check_array(X, dtype=[np.float64, np.float32], ensure_2d=True,
--> 391                         copy=self.copy)
    392 
    393         # Handle n_components==None

~/.local/lib/python3.7/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
    529                     array = array.astype(dtype, casting="unsafe", copy=False)
    530                 else:
--> 531                     array = np.asarray(array, order=order, dtype=dtype)
    532             except ComplexWarning:
    533                 raise ValueError("Complex data not supported\n"

~/.local/lib/python3.7/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
     83 
     84     """
---> 85     return array(a, dtype, copy=False, order=order)
     86 
     87 

ValueError: could not convert string to float: 'TSPAN6'
In [ ]:
pca.explained_variance_ratio_

GSEA analysis

In [120]:
data
Out[120]:
mr120-MV411-RNP_IRF2BP2-r4 mr129-MV411-RNP_MYC-r4 mr130-MV411-RNP_MYC-r5 mr131-MV411-RNP_MYC-r6 mr132-MV411-RNP_RUNX1-r4 mr133-MV411-RNP_RUNX1-r5 mr134-MV411-RNP_RUNX1-r6 mr135-MV411-RNP_RUNX2-r4 mr136-MV411-RNP_RUNX2-r5 mr137-MV411-RNP_RUNX2-r6 ... mr182-MV411-RNP_MYBL2-r3 mr183-MV411-RNP_HOXA9-r4 mr184-MV411-RNP_HOXA9-r5 mr185-MV411-RNP_HOXA9-r6 mr186-MV411-RNP_AAVS1-r1 mr187-MV411-RNP_AAVS1-r2 mr188-MV411-RNP_AAVS1-r3 mr126-MV411-RNP_MEF2D-r4 mr127-MV411-RNP_MEF2D-r5 mr128-MV411-RNP_MEF2D-r6
0 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00 0.00
1 1619.00 2465.00 1701.00 1535.00 1863.00 2093.00 2027.00 2202.00 2148.00 2235.00 ... 3272.00 3686.00 3990.00 4714.0 1620.00 1840.00 1729.00 1983.00 2451.00 2378.00
2 464.57 846.12 672.69 603.75 577.41 617.97 601.43 545.49 575.14 536.97 ... 961.52 1024.20 1155.40 1316.6 430.78 460.04 437.36 542.42 670.02 576.38
3 780.43 1031.90 755.31 676.25 1232.70 1209.00 1309.60 1370.50 1245.90 1257.10 ... 1647.50 2260.80 2422.60 2757.4 949.22 1277.00 1032.60 1163.60 1481.00 1332.90
4 1443.00 8556.00 6387.00 5955.00 2359.00 2615.00 2258.00 3340.00 3229.00 3466.00 ... 4120.00 4514.00 4748.00 5478.0 2323.00 2401.00 2230.00 3680.00 4706.00 4308.00
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
38682 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00 0.00
38683 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00 0.00
38684 8.76 23.59 28.71 8.43 8.34 23.57 35.35 9.34 30.93 36.25 ... 8.43 5.75 30.07 22.7 9.09 9.78 9.26 31.53 16.19 5.61
38685 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00 0.00
38686 1.00 2.00 4.00 2.00 5.00 3.00 3.00 5.00 1.00 0.00 ... 2.00 2.00 3.00 4.0 0.00 0.00 1.00 0.00 1.00 4.00

38687 rows × 69 columns

In [167]:
res = {}
In [133]:
data = datad
In [150]:
totest
Out[150]:
mr123-MV411-RNP_IRF8-r4 mr124-MV411-RNP_IRF8-r5 mr125-MV411-RNP_IRF8-r6 mr186-MV411-RNP_AAVS1-r1 mr187-MV411-RNP_AAVS1-r2 mr188-MV411-RNP_AAVS1-r3
0 0.00 0.00 0.00 0.00 0.00 0.00
1 2211.00 2243.00 2269.00 1620.00 1840.00 1729.00
2 611.42 621.91 622.93 430.78 460.04 437.36
3 1390.60 1268.10 1244.10 949.22 1277.00 1032.60
4 3652.00 3917.00 4442.00 2323.00 2401.00 2230.00
... ... ... ... ... ... ...
38774 2.00 4.00 0.00 1.00 1.00 5.00
38775 165.00 119.00 130.00 93.00 139.00 87.00
38776 2.00 4.00 0.00 3.00 4.00 1.00
38777 51.00 52.00 31.00 41.00 56.00 33.00
38778 8976.00 7816.00 9319.00 7058.00 7576.00 5882.00

38779 rows × 6 columns

In [152]:
data = data.set_index('gene_id',drop=True)
In [168]:
res[val]
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-168-edbe59d9522e> in <module>
----> 1 res[val]

KeyError: 'RNP_IRF8'
In [169]:
for val in experiments:
    print(val)
    totest = data[[v for v in data.columns[:-1] if val in v or 'AAVS1' in v]]
    cls = ['Condition' if val in v else 'DMSO' for v in totest.columns]
    res[val] = gseapy.gsea(data=totest, gene_sets='WikiPathways_2013', 
                cls= cls, no_plot=False, processes=10)
    res[val].res2d['Term'] = [i for i in res[val].res2d.index]
    sns.barplot(data=res[val].res2d.iloc[:25], x="es", y="Term",
                hue_order="geneset_size").set_title(val)
RNP_IRF8
RNP_SPI1
RNP_FLI1
RNP_CEBPA
RNP_MYC
RNP_MYB
RNP_ELF2
RNP_GFI1
RNP_RUNX1
RNP_IRF2BP2
RNP_MEF2D
RNP_IKZF1
RNP_MEF2C
RNP_LMO2
RNP_MYBL2
RNP_MAX
RNP_ZMYND8
RNP_LYL1
RNP_HOXA9
RNP_RUNX2
RNP_ZEB2
RNP_MEIS1
In [170]:
with open('../data/wikipathway_RNPv2', 'wb') as f:
    pickle.dump(res,f)
In [158]:
with open('../data/wikipathway_RNPv2','rb') as f:
    res = pickle.load(f)
In [172]:
for i, val in enumerate(experiments):
    plt.figure(i)
    res[val].res2d.Term = [i[2:-13] for i in res[val].res2d.index]
    sns.barplot(data=res[val].res2d.iloc[:25], x="es", y="Term",
                hue_order="geneset_size").set_title(val)
/home/jeremie/.local/lib/python3.7/site-packages/ipykernel_launcher.py:2: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  
In [174]:
a = set()
for k, val in res.items():
    a.update(set(val.res2d.index))
a = {i:[0]*len(res) for i in a}
for n,(k, val) in enumerate(res.items()):
    for i,v in val.res2d.iterrows():
        a[i][n] = v.es
res = pd.DataFrame(a, index=res.keys())
fig, ax = plt.subplots(figsize=(20,15))
sns.heatmap(ax=ax,data=res)
Out[174]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fb2cdbb0590>
In [175]:
model = AgglomerativeClustering(n_clusters=6,linkage="average", 
                                affinity="cosine", compute_full_tree=True)
labels = model.fit_predict(res)
ii = itertools.count(res.shape[0])
tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]
sort = labels.argsort()
a = plotCorrelationMatrix(res.values[sort],res.index[sort].tolist(),interactive=True)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-175-18b55e342942> in <module>
      5 tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]
      6 sort = labels.argsort()
----> 7 a = plotCorrelationMatrix(res.values[sort],res.index[sort].tolist(),interactive=True)

NameError: name 'plotCorrelationMatrix' is not defined
In [176]:
fig, ax = plt.subplots(figsize=(20,15))
sns.heatmap(ax=ax,data=res)
Out[176]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fb2cc92bc10>
In [ ]:
fig.savefig("enriched_terms.png")
In [ ]:
show(a)
In [ ]:
fi
In [ ]:
experiments
In [ ]:
data
In [179]:
res = {}
In [181]:
for i, val in enumerate(experiments):
    print(val)
    totest = data[[v for v in data.columns[:-1] if val in v or 'AAVS1' in v]]
    cls = ['Condition' if val in v else 'DMSO' for v in totest.columns]
    res[val] = gseapy.gsea(data=totest, gene_sets='GO_Biological_Process_2015', 
                cls= cls, no_plot=False, processes=14)
    res[val].res2d['Term'] = [i for i in res[val].res2d.index]
    plt.figure(i)
    sns.barplot(data=res[val].res2d.iloc[:25], x="es", y="Term",
                hue_order="geneset_size").set_title(val)
RNP_IRF8
RNP_SPI1
RNP_FLI1
RNP_CEBPA
RNP_MYC
RNP_MYB
RNP_ELF2
RNP_GFI1
RNP_RUNX1
RNP_IRF2BP2
RNP_MEF2D
RNP_IKZF1
RNP_MEF2C
RNP_LMO2
RNP_MYBL2
RNP_MAX
RNP_ZMYND8
RNP_LYL1
RNP_HOXA9
RNP_RUNX2
RNP_ZEB2
/home/jeremie/.local/lib/python3.7/site-packages/ipykernel_launcher.py:8: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  
RNP_MEIS1
In [182]:
with open('../data/GO_Biological_Process_2015_RNPv2', 'wb') as f:
    pickle.dump(res,f)
In [ ]:
with open('GO_Biological_Process_2015','rb') as f:
    res = pickle.load(f)

creating matrices

In [183]:
a = set()
for k, val in res.items():
    a.update(set(val.res2d.Term))
a = {i:[0]*len(res) for i in a}
for n,(k, val) in enumerate(res.items()):
    for i,v in val.res2d.iterrows():
        a[v.Term][n] = v.es
res = pd.DataFrame(a, index=res.keys())
fig, ax = plt.subplots(figsize=(20,15))
sns.heatmap(ax=ax,data=res)
Out[183]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fb2674cd8d0>
In [205]:
model = AgglomerativeClustering(n_clusters=8,linkage="average", 
                                affinity="cosine", compute_full_tree=True)
labels = model.fit_predict(res)
ii = itertools.count(res.shape[0])
tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]
In [206]:
sort = labels.argsort()
In [207]:
a = h.plotCorrelationMatrix(res.values[sort],res.index[sort].tolist(),interactive=True,title="RNP2_bioproc_corr")
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('alphas', 484), ('colors', 484), ('data', 22), ('xname', 484), ('yname', 484)
/home/jeremie/.local/lib/python3.7/site-packages/bokeh/io/saving.py:126: UserWarning: save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN
  warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")
/home/jeremie/.local/lib/python3.7/site-packages/bokeh/io/saving.py:139: UserWarning: save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'
  warn("save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'")
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-207-d342427fa33f> in <module>
----> 1 a = h.plotCorrelationMatrix(res.values[sort],res.index[sort].tolist(),interactive=True,title="RNP2_bioproc_corr")

~/JKBio/Helper.py in plotCorrelationMatrix(data, names, colors, title, dataIsCorr, invert, size, interactive, rangeto)
    390     except:
    391       show(p)
--> 392     save(p, title + '.html')
    393 
    394     return p  # show the plot

~/.local/lib/python3.7/site-packages/bokeh/io/saving.py in save(obj, filename, resources, title, template, state, **kwargs)
     84 
     85     filename, resources, title = _get_save_args(state, filename, resources, title)
---> 86     _save_helper(obj, filename, resources, title, template)
     87     return abspath(filename)
     88 

~/.local/lib/python3.7/site-packages/bokeh/io/saving.py in _save_helper(obj, filename, resources, title, template)
    146     '''
    147     from ..embed import file_html
--> 148     html = file_html(obj, resources, title=title, template=template)
    149 
    150     with io.open(filename, mode="w", encoding="utf-8") as f:

~/.local/lib/python3.7/site-packages/bokeh/embed/standalone.py in file_html(models, resources, title, template, template_variables, theme, suppress_callback_warning, _always_new)
    288         models = models.roots
    289 
--> 290     with OutputDocumentFor(models, apply_theme=theme, always_new=_always_new) as doc:
    291         (docs_json, render_items) = standalone_docs_json_and_render_items(models, suppress_callback_warning=suppress_callback_warning)
    292         title = _title_from_models(models, title)

/usr/lib/python3.7/contextlib.py in __enter__(self)
    110         del self.args, self.kwds, self.func
    111         try:
--> 112             return next(self.gen)
    113         except StopIteration:
    114             raise RuntimeError("generator didn't yield") from None

~/.local/lib/python3.7/site-packages/bokeh/embed/util.py in OutputDocumentFor(objs, apply_theme, always_new)
    136             doc = Document()
    137             for model in objs:
--> 138                 doc.add_root(model)
    139 
    140         # handle a single shared document

~/.local/lib/python3.7/site-packages/bokeh/document/document.py in add_root(self, model, setter)
    302             self._roots.append(model)
    303         finally:
--> 304             self._pop_all_models_freeze()
    305         self._trigger_on_change(RootAddedEvent(self, model, setter))
    306 

~/.local/lib/python3.7/site-packages/bokeh/document/document.py in _pop_all_models_freeze(self)
   1017         self._all_models_freeze_count -= 1
   1018         if self._all_models_freeze_count == 0:
-> 1019             self._recompute_all_models()
   1020 
   1021     def _recompute_all_models(self):

~/.local/lib/python3.7/site-packages/bokeh/document/document.py in _recompute_all_models(self)
   1040             d._detach_document()
   1041         for a in to_attach:
-> 1042             a._attach_document(self)
   1043         self._all_models = recomputed
   1044         self._all_models_by_name = recomputed_by_name

~/.local/lib/python3.7/site-packages/bokeh/model.py in _attach_document(self, doc)
    725         '''
    726         if self._document is not None and self._document is not doc:
--> 727             raise RuntimeError("Models must be owned by only a single document, %r is already in a doc" % (self))
    728         doc.theme.apply_to_model(self)
    729         self._document = doc

RuntimeError: Models must be owned by only a single document, Rect(id='43474', ...) is already in a doc
In [ ]:
cluster1= ['LMO2','LYL1','MAX','MEF2C']
cluster2=['GFI1','FLI1','MYB','IKZF1','ELF2','CEBPa','MEIS1']
cluster3=['IRF2BP2','MEF2C','CDK6','MEF2D','IRF8','BRD4','MYC']
cluster4= ['RUNX1','RUNX2','ZMYND8']
In [ ]:
res.loc[cluster2].mean().sort_values()
In [ ]:
'GO_Molecular_Function_2015',
'GeneSigDB',
'ENCODE_TF_ChIP-seq_2014',
#'Drug_Perturbations_from_GEO_2014',
'GO_Cellular_Component_2015',
'GO_Biological_Process_2015',
'PPI_Hub_Proteins',
'WikiPathways_2013',
'TF-LOF_Expression_from_GEO',
# msig db C2 C6 H http://software.broadinstitute.org/gsea/msigdb/annotate.jsp
# max's crc

compare to the drop of CTF

In [ ]:
ctf = [
 'BRD4',
 'CDK6',
 'CEBPA',
 'ELF2',
 'FLI1',
 'GFI1',
 'IKZF1',
 'IRF2BP2',
 'IRF8',
 'LMO2',
 'LYL1',
 'MAX',
 'MEF2C',
 'MEF2D',
 'MEIS1',
 'MYB',
 'MYC',
 'RUNX1',
 'RUNX2',
 'SPI1',
 'ZEB2',
 'ZMYND8'
]
In [ ]:
deseq = pd.DataFrame()
for k, val in results.items():
    deseq[k] = val.log2FoldChange
deseq=deseq.T
In [ ]:
deseq
In [ ]:
a = plotCorrelationMatrix(a, deseq.index[sort].tolist(),interactive=True)
In [ ]:
ctf[11] = 'CEBPa'
ctf[]
In [ ]:
ctf

dropping ETV6 SP1 GSE1 LDB1

In [ ]:
deseq.loc[['MYC',
 'MYB',
 'SPI1',
 'RUNX1',
 'IRF2BP2',
 'FLI1',
 'ELF2',
 'ZEB2',
 'GFI1',
 'LMO2',
 'CEBPa',
 'MEF2D',
 'MEF2C',
 'IRF8',
 'MEIS1',
 'RUNX2',
 'RUNX2',
 'ZMYND8']]
In [ ]:
show(a)
In [ ]:
deseq_ctf = deseq.loc[['MYC',
 'MYB',
 'SPI1',
 'RUNX1',
 'IRF2BP2',
 'FLI1',
 'ELF2',
 'ZEB2',
 'GFI1',
 'LMO2',
 'CEBPa',
 'MEF2D',
 'MEF2C',
 'IRF8',
 'MEIS1',
 'RUNX2',
 'ZMYND8']]
model = AgglomerativeClustering(n_clusters=7,linkage="average", 
                                affinity="cosine", compute_full_tree=True)
labels = model.fit_predict(deseq_ctf)
ii = itertools.count(deseq_ctf.shape[0])
tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]
sort = labels.argsort()
a = deseq_ctf.values[sort]
a = plotCorrelationMatrix(a, deseq_ctf.index[sort].tolist(),interactive=True)
In [ ]:
show(a)
In [ ]:
model = AgglomerativeClustering(n_clusters=7,linkage="average", 
                                affinity="cosine", compute_full_tree=True)
labels = model.fit_predict(deseq)
ii = itertools.count(deseq.shape[0])
tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]
sort = labels.argsort()
a = deseq.values[sort]
a = plotCorrelationMatrix(a, deseq.index[sort].tolist(),interactive=True)
In [ ]:
show(a)

tsne, pca, clustering accross TF, CRC, most var genes, both ways.